In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
from itertools import chain
from time import time
import shutil
import argparse
import configparser
from model.ASTGCN_r import make_model
from model.LSTM import LSTM
from lib.utils import load_graphdata_channel1, get_adjacency_matrix, compute_val_loss_mstgcn_lstm, predict_and_save_results_mstgcn_lstm
from tensorboardX import SummaryWriter

In [5]:
config = configparser.ConfigParser()
config.read("configurations/TMCOVID_astgcn.conf")
data_config = config['Data']
training_config = config['Training']

adj_filename = data_config['adj_filename']
graph_signal_matrix_filename = data_config['graph_signal_matrix_filename']
if config.has_option('Data', 'id_filename'):
    id_filename = data_config['id_filename']
else:
    id_filename = None

num_of_vertices = int(data_config['num_of_vertices'])
points_per_hour = int(data_config['points_per_hour'])
num_for_predict = int(data_config['num_for_predict'])
len_input = int(data_config['len_input'])
dataset_name = data_config['dataset_name']

model_name = training_config['model_name']

ctx = training_config['ctx']
os.environ["CUDA_VISIBLE_DEVICES"] = ctx
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda:0' if USE_CUDA else "cpu")
print("CUDA:", USE_CUDA, DEVICE)

learning_rate = float(training_config['learning_rate'])
epochs = int(training_config['epochs'])
start_epoch = int(training_config['start_epoch'])
batch_size = int(training_config['batch_size'])
num_of_weeks = int(training_config['num_of_weeks'])
num_of_days = int(training_config['num_of_days'])
num_of_hours = int(training_config['num_of_hours'])
time_strides = 1 # num_of_hours
nb_chev_filter = int(training_config['nb_chev_filter'])
nb_time_filter = int(training_config['nb_time_filter'])
in_channels = int(training_config['in_channels'])
nb_block = int(training_config['nb_block'])
K = int(training_config['K'])

folder_dir = '%s_h%dd%dw%d_channel%d_%e' % (model_name, num_of_hours, num_of_days, num_of_weeks, in_channels, learning_rate)
print('folder_dir:', folder_dir)
params_path = os.path.join('experiments', dataset_name, folder_dir)
print('params_path:', params_path)

CUDA: True cuda:0
folder_dir: astgcn_r_h2d2w1_channel2_1.000000e-03
params_path: experiments/TM_COVID/astgcn_r_h2d2w1_channel2_1.000000e-03


In [3]:
train_loader, train_target_tensor, val_loader, val_target_tensor, test_loader, test_target_tensor, _mean, _std = load_graphdata_channel1(
    graph_signal_matrix_filename, num_of_hours,
    num_of_days, num_of_weeks, DEVICE, batch_size, True, True)

load file: ./data/TMCOVID/In_Out_Flow_202001_to_202105_r2_d2_w1_astcgn
train: torch.Size([6413, 119, 2, 15]) torch.Size([6413, 119, 3])
val: torch.Size([2138, 119, 2, 15]) torch.Size([2138, 119, 3])
test: torch.Size([2138, 119, 2, 15]) torch.Size([2138, 119, 3])


In [5]:
list(map(lambda x: x.shape, next(iter(train_loader)))), 
list(map(lambda x: x.shape, next(iter(val_loader)))), 
list(map(lambda x: x.shape, next(iter(test_loader))))

[torch.Size([32, 119, 2, 15]),
 torch.Size([32, 119, 3]),
 torch.Size([32, 7, 4])]

In [6]:
adj_mx, distance_mx = get_adjacency_matrix(adj_filename, num_of_vertices, id_filename)

In [15]:
net = make_model(DEVICE, nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, adj_mx,
                 num_for_predict, len_input, num_of_vertices)

lstm_net = LSTM(input_dim=4, hidden_dim=50, num_layers=1, output_dim=119)

In [16]:
sub_train_inputs, sub_train_target, sub_covid_inputs = next(iter(train_loader))

In [12]:
DEVICE, nb_block, in_channels, K

(device(type='cpu'), 1, 2, 2)

In [13]:
time_strides, num_for_predict, len_input, num_of_vertices

(2, 3, 15, 119)

In [11]:
sub_train_inputs.shape

torch.Size([32, 119, 2, 15])

In [17]:
net(sub_train_inputs)

tensor([[[-0.0096, -0.0341, -2.6823],
         [-0.7232, -1.4416, -1.3746],
         [-0.5761, -0.3826, -2.4154],
         ...,
         [-0.8678, -1.3409, -1.4797],
         [-0.9404, -1.3939, -1.4266],
         [ 0.1402, -0.1418, -2.7332]],

        [[-0.1329, -0.0467,  0.9250],
         [-1.0710, -1.5668, -1.5094],
         [-1.2921, -1.9157, -0.5195],
         ...,
         [-1.0002, -1.5352, -1.3135],
         [-0.9398, -1.5320, -1.2586],
         [-0.3146, -0.4813,  0.7941]],

        [[ 0.0310, -1.0097, -2.8949],
         [-0.7510, -1.5188, -1.4478],
         [-0.2905, -1.3012, -2.3653],
         ...,
         [-0.9020, -1.3783, -1.4374],
         [-0.9409, -1.3939, -1.4027],
         [-0.1492, -1.0596, -2.6145]],

        ...,

        [[ 0.5830,  0.0898,  0.8178],
         [-0.9678, -1.7770, -1.5016],
         [-0.4201, -1.7359, -0.3251],
         ...,
         [-1.0158, -1.6495, -1.2925],
         [-0.9447, -1.5363, -1.2988],
         [ 0.1693, -0.5760,  0.6166]],

        [[

In [17]:
sub_train_inputs.shape

torch.Size([32, 119, 2, 15])

In [14]:
def train_main():
    if (start_epoch == 0) and (not os.path.exists(params_path)):
        os.makedirs(params_path)
        print('create params directory %s' % (params_path))
    elif (start_epoch == 0) and (os.path.exists(params_path)):
        shutil.rmtree(params_path)
        os.makedirs(params_path)
        print('delete the old one and create params directory %s' % (params_path))
    elif (start_epoch > 0) and (os.path.exists(params_path)):
        print('train from params directory %s' % (params_path))
    else:
        raise SystemExit('Wrong type of model!')

    print('param list:')
    print('CUDA\t', DEVICE)
    print('in_channels\t', in_channels)
    print('nb_block\t', nb_block)
    print('nb_chev_filter\t', nb_chev_filter)
    print('nb_time_filter\t', nb_time_filter)
    print('time_strides\t', time_strides)
    print('batch_size\t', batch_size)
    print('graph_signal_matrix_filename\t', graph_signal_matrix_filename)
    print('start_epoch\t', start_epoch)
    print('epochs\t', epochs)

    criterion = nn.MSELoss().to(DEVICE)
    optimizer = optim.Adam(chain(lstm_net.parameters(), net.parameters()), lr=learning_rate)
    sw = SummaryWriter(logdir=params_path, flush_secs=5)
    
    total_param = 0
    for param_tensor in net.state_dict():
        total_param += np.prod(net.state_dict()[param_tensor].size())
    print('Net\'s total params:', total_param)
    
    total_param = 0
    for param_tensor in lstm_net.state_dict():
        total_param += np.prod(lstm_net.state_dict()[param_tensor].size())
    print('LSTM-Net\'s total params:', total_param)

    global_step = 0
    best_epoch = 0
    best_val_loss = np.inf

    start_time = time()

    if start_epoch > 0:

        params_filename = os.path.join(params_path, 'epoch_%s.params' % start_epoch)

        net.load_state_dict(torch.load(params_filename))

        print('start epoch:', start_epoch)

        print('load weight from: ', params_filename)

    # train model
    for epoch in range(start_epoch, epochs):

        params_filename = os.path.join(params_path, 'epoch_%s.params' % epoch)

        val_loss = compute_val_loss_mstgcn_lstm(net, lstm_net, val_loader, criterion, sw, epoch)
        # val_loss = compute_val_loss_mstgcn(net, val_loader, criterion, sw, epoch)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_epoch = epoch
            torch.save(net.state_dict(), params_filename)
            print('save parameters to file: %s' % params_filename)

        net.train()  # ensure dropout layers are in train mode
        lstm_net.train()

        for batch_index, batch_data in enumerate(train_loader):

            encoder_inputs, labels, covid_inputs = batch_data
            
            optimizer.zero_grad()

            outputs = net(encoder_inputs)
            
            print("encoder_inputs shape: ", encoder_inputs.shape)
            print("outputs shape: ", outputs.shape)
            
            covid_weights = lstm_net(covid_inputs).unsqueeze(2)
            
            print("covid_weights shape: ", covid_weights.shape)
            final_outputs = outputs + covid_weights * outputs
            print("final_outputs shape: ", final_outputs.shape)
            loss = criterion(final_outputs, labels)

            loss.backward()

            optimizer.step()

            training_loss = loss.item()

            global_step += 1

            sw.add_scalar('training_loss', training_loss, global_step)

            if global_step % 1000 == 0:

                print('global step: %s, training loss: %.2f, time: %.2fs' % (global_step, training_loss, time() - start_time))

    print('best epoch:', best_epoch)

    # apply the best model on the test set
    predict_main(best_epoch, test_loader, test_target_tensor, _mean, _std, 'test')

def predict_main(global_step, data_loader, data_target_tensor, _mean, _std, type):
    '''

    :param global_step: int
    :param data_loader: torch.utils.data.utils.DataLoader
    :param data_target_tensor: tensor
    :param mean: (1, 1, 3, 1)
    :param std: (1, 1, 3, 1)
    :param type: string
    :return:
    '''

    params_filename = os.path.join(params_path, 'epoch_%s.params' % global_step)
    print('load weight from:', params_filename)

    net.load_state_dict(torch.load(params_filename))

    predict_and_save_results_mstgcn_lstm(net, lstm_net, data_loader, data_target_tensor, global_step, _mean, _std, params_path, type)

In [15]:
train_main()

delete the old one and create params directory experiments/TM_COVID/astgcn_r_h2d2w1_channel2_1.000000e-03
param list:
CUDA	 cpu
in_channels	 2
nb_block	 1
nb_chev_filter	 8
nb_time_filter	 8
time_strides	 2
batch_size	 32
graph_signal_matrix_filename	 ./data/TMCOVID/In_Out_Flow_202001_to_202105.npy
start_epoch	 0
epochs	 80
Net's total params: 29621
LSTM-Net's total params: 17269


RuntimeError: Given groups=1, weight of size [3, 7, 1, 8], expected input[32, 8, 119, 8] to have 7 channels, but got 8 channels instead