In [1]:
from stgraph_trainer.datasets import load_province_temporal_data
from stgraph_trainer.datasets import load_province_coordinates
from stgraph_trainer.datasets import preprocess_data_for_stgnn
from stgraph_trainer.utils import PairDataset
from stgraph_trainer.utils import compute_metrics
from stgraph_trainer.utils import matplotlib_plot_font
from stgraph_trainer.utils import save_figure_predict
from torch.utils.data import DataLoader
from stgraph_trainer.models import ProposedSTGNN
from stgraph_trainer.trainers import ProposedSTGNNTrainer
import torch
import numpy as np
import pandas as pd
import dgl
import scipy.sparse as sp

torch.manual_seed(42)
matplotlib_plot_font()

  from .autonotebook import tqdm as notebook_tqdm
Using TensorFlow backend.


In [2]:
import numpy as np
import scipy.sparse as sp
import torch
from torch.nn.functional import normalize


def calculate_laplacian_with_self_loop(matrix):
    matrix = matrix + torch.eye(matrix.size(0))
    row_sum = matrix.sum(1)
    d_inv_sqrt = torch.pow(row_sum, -0.5).flatten()
    d_inv_sqrt[torch.isinf(d_inv_sqrt)] = 0.0
    d_mat_inv_sqrt = torch.diag(d_inv_sqrt)
    normalized_laplacian = (
        matrix.matmul(d_mat_inv_sqrt).transpose(0, 1).matmul(d_mat_inv_sqrt)
    )
    return normalized_laplacian

In [3]:
import argparse
import torch
import torch.nn as nn


class TGCNGraphConvolution(nn.Module):
    def __init__(self, adj, num_gru_units: int, output_dim: int, bias: float = 0.0):
        super(TGCNGraphConvolution, self).__init__()
        self._num_gru_units = num_gru_units
        self._output_dim = output_dim
        self._bias_init_value = bias
        self.register_buffer(
            "laplacian", calculate_laplacian_with_self_loop(torch.FloatTensor(adj))
        )
        self.weights = nn.Parameter(
            torch.FloatTensor(self._num_gru_units + 1, self._output_dim)
        )
        self.biases = nn.Parameter(torch.FloatTensor(self._output_dim))
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.weights)
        nn.init.constant_(self.biases, self._bias_init_value)

    def forward(self, inputs, hidden_state):
        batch_size, num_nodes = inputs.shape
        # inputs (batch_size, num_nodes) -> (batch_size, num_nodes, 1)
        inputs = inputs.reshape((batch_size, num_nodes, 1))
        # hidden_state (batch_size, num_nodes, num_gru_units)
        hidden_state = hidden_state.reshape(
            (batch_size, num_nodes, self._num_gru_units)
        )
        # [x, h] (batch_size, num_nodes, num_gru_units + 1)
        concatenation = torch.cat((inputs, hidden_state), dim=2)
        # [x, h] (num_nodes, num_gru_units + 1, batch_size)
        concatenation = concatenation.transpose(0, 1).transpose(1, 2)
        # [x, h] (num_nodes, (num_gru_units + 1) * batch_size)
        concatenation = concatenation.reshape(
            (num_nodes, (self._num_gru_units + 1) * batch_size)
        )
        # A[x, h] (num_nodes, (num_gru_units + 1) * batch_size)
        a_times_concat = self.laplacian @ concatenation
        # A[x, h] (num_nodes, num_gru_units + 1, batch_size)
        a_times_concat = a_times_concat.reshape(
            (num_nodes, self._num_gru_units + 1, batch_size)
        )
        # A[x, h] (batch_size, num_nodes, num_gru_units + 1)
        a_times_concat = a_times_concat.transpose(0, 2).transpose(1, 2)
        # A[x, h] (batch_size * num_nodes, num_gru_units + 1)
        a_times_concat = a_times_concat.reshape(
            (batch_size * num_nodes, self._num_gru_units + 1)
        )
        # A[x, h]W + b (batch_size * num_nodes, output_dim)
        outputs = a_times_concat @ self.weights + self.biases
        # A[x, h]W + b (batch_size, num_nodes, output_dim)
        outputs = outputs.reshape((batch_size, num_nodes, self._output_dim))
        # A[x, h]W + b (batch_size, num_nodes * output_dim)
        outputs = outputs.reshape((batch_size, num_nodes * self._output_dim))
        return outputs

    @property
    def hyperparameters(self):
        return {
            "num_gru_units": self._num_gru_units,
            "output_dim": self._output_dim,
            "bias_init_value": self._bias_init_value,
        }


class TGCNCell(nn.Module):
    def __init__(self, adj, input_dim: int, hidden_dim: int):
        super(TGCNCell, self).__init__()
        self._input_dim = input_dim
        self._hidden_dim = hidden_dim
        self.register_buffer("adj", torch.FloatTensor(adj))
        self.graph_conv1 = TGCNGraphConvolution(
            self.adj, self._hidden_dim, self._hidden_dim * 2, bias=1.0
        )
        self.graph_conv2 = TGCNGraphConvolution(
            self.adj, self._hidden_dim, self._hidden_dim
        )

    def forward(self, inputs, hidden_state):
        # [r, u] = sigmoid(A[x, h]W + b)
        # [r, u] (batch_size, num_nodes * (2 * num_gru_units))
        concatenation = torch.sigmoid(self.graph_conv1(inputs, hidden_state))
        # r (batch_size, num_nodes, num_gru_units)
        # u (batch_size, num_nodes, num_gru_units)
        r, u = torch.chunk(concatenation, chunks=2, dim=1)
        # c = tanh(A[x, (r * h)W + b])
        # c (batch_size, num_nodes * num_gru_units)
        c = torch.tanh(self.graph_conv2(inputs, r * hidden_state))
        # h := u * h + (1 - u) * c
        # h (batch_size, num_nodes * num_gru_units)
        new_hidden_state = u * hidden_state + (1.0 - u) * c
        return new_hidden_state, new_hidden_state

    @property
    def hyperparameters(self):
        return {"input_dim": self._input_dim, "hidden_dim": self._hidden_dim}


class TGCN(nn.Module):
    def __init__(self, adj, hidden_dim: int, **kwargs):
        super(TGCN, self).__init__()
        self._input_dim = adj.shape[0]
        self._hidden_dim = hidden_dim
        self.register_buffer("adj", torch.FloatTensor(adj))
        self.tgcn_cell = TGCNCell(self.adj, self._input_dim, self._hidden_dim)

    def forward(self, inputs):
        batch_size, num_nodes, seq_len = inputs.shape
        assert self._input_dim == num_nodes
        hidden_state = torch.zeros(batch_size, num_nodes * self._hidden_dim).type_as(
            inputs
        )
        output = None
        for i in range(seq_len):
            output, hidden_state = self.tgcn_cell(inputs[:, :, i], hidden_state)
            output = output.reshape((batch_size, num_nodes, self._hidden_dim))
        return output

    @staticmethod
    def add_model_specific_arguments(parent_parser):
        parser = argparse.ArgumentParser(parents=[parent_parser], add_help=False)
        parser.add_argument("--hidden_dim", type=int, default=64)
        return parser

    @property
    def hyperparameters(self):
        return {"input_dim": self._input_dim, "hidden_dim": self._hidden_dim}

In [4]:
MODEL_NAME = "STGNN_1"
TIME_STEPS = 5
BATCH_SIZE = 16
EPOCHS = 150
learning_rate = 1e-3
device = torch.device('cuda', 0) if torch.cuda.is_available() else torch.device('cpu')

region_type = 'state'
df = pd.read_csv(f'/Users/jeonjunhwi/문서/Projects/Master_GNN/Data/KCDC_data/Processing_Results/smoothing_3_{region_type}_mean.csv', index_col=0, encoding='cp949')
df = df.iloc[100:700] # 12월 까지만 해보자

region_dict = {}
for i, region in enumerate(df.columns):
    region_dict[i] = region
    
len_val = int(df.shape[0] * 0.1)
len_test = 25
len_train = df.shape[0] - len_val - len_test

train, val, test, _, _, scaler = preprocess_data_for_stgnn(data=df,
                                                           len_train=len_train,
                                                           len_val=len_val,
                                                           len_test=len_test,
                                                           time_steps=TIME_STEPS)

X_train, y_train = train[0], train[1]
X_val, y_val = val[0], val[1]
X_test, y_test = test[0], test[1]

print('train date : ~', df.index[len_train])
print('val date : ~', df.index[len_train + len_val])
print('val date : ~', df.index[-1])


X_train = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(-1)
X_val = torch.tensor(X_val, dtype=torch.float32).unsqueeze(-1)
y_val = torch.tensor(y_val, dtype=torch.float32).unsqueeze(-1)
X_test = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(-1)
n_test_samples = len(y_test)

train_dl = DataLoader(PairDataset(X_train, y_train),
                      batch_size=BATCH_SIZE,
                      shuffle=True)

val_dl = DataLoader(PairDataset(X_val, y_val),
                      batch_size=BATCH_SIZE,
                      shuffle=False)

test_dl = DataLoader(PairDataset(X_test, y_test),
                      batch_size=1,
                      shuffle=False)

from stgraph_trainer.datasets import Data2Graph
import pandas as pd
# region_type = 'state'
# graph_type = f'dist_01_{region_type}'
dist_mx = pd.read_csv(f'data/distances_kr_{region_type}_adj_mx.csv', encoding='cp949', index_col=0)
norm = 0.5
data2network = Data2Graph(distance_matrix = dist_mx, temporal_data = df)
G, adj_mx, graph_type = data2network.make_network(network_type='dist-corr',
                                                    region_type=region_type,
                                                    norm=norm,
                                                    int_adj=False) # 대체로 False가 더 좋았음.

train date : ~ 2021-09-27
val date : ~ 2021-11-26
val date : ~ 2021-12-20


In [5]:
from stgraph_trainer.trainers import TGCNTrainer

In [10]:
model = TGCN(adj=adj_mx, hidden_dim=1)
save_path = 'tgcnmodel.pt'
loss_func = torch.nn.MSELoss()
optimizer = torch.optim.NAdam(model.parameters(), lr=learning_rate)
trainer = TGCNTrainer(model = model,
                       train_loader = train_dl,
                       val_loader = val_dl,
                       test_loader = X_test.reshape(-1, len(df.columns),TIME_STEPS),
                       loss = loss_func,
                       optimizer = optimizer,
                       scaler = scaler,
                       device = device,
                       save_path = save_path,
                       length = len(df.columns),
                       TIME_STEPS = TIME_STEPS,
                       callbacks=None,
                       raw_test=df.iloc[-(n_test_samples + 1):].values)
                       

In [11]:
trainer.train(epochs=50)

4.869941711425781
Epoch: 1; Elapsed time: 0.09280896186828613; Train loss: 1.084094; Val MSE: 4.869942; Val loss RMSE: 2.206794
4.865755844116211
Epoch: 2; Elapsed time: 0.0784459114074707; Train loss: 1.079653; Val MSE: 4.865756; Val loss RMSE: 2.205846
4.86202914498069
Epoch: 3; Elapsed time: 0.07629609107971191; Train loss: 1.076621; Val MSE: 4.862029; Val loss RMSE: 2.205001
4.85845609144731
Epoch: 4; Elapsed time: 0.07417893409729004; Train loss: 1.077370; Val MSE: 4.858456; Val loss RMSE: 2.204191
4.854772342335094
Epoch: 5; Elapsed time: 0.07329702377319336; Train loss: 1.076144; Val MSE: 4.854772; Val loss RMSE: 2.203355
4.851097627119585
Epoch: 6; Elapsed time: 0.07080602645874023; Train loss: 1.073410; Val MSE: 4.851098; Val loss RMSE: 2.202521
4.847558819163929
Epoch: 7; Elapsed time: 0.07640385627746582; Train loss: 1.072046; Val MSE: 4.847559; Val loss RMSE: 2.201717
4.843824612010609
Epoch: 8; Elapsed time: 0.07169508934020996; Train loss: 1.066396; Val MSE: 4.843825; Val

{'epoch': [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50],
 'train_loss': [1.0840944461524487,
  1.0796530032530427,
  1.0766206262633204,
  1.0773702133446932,
  1.076144015416503,
  1.0734095992520452,
  1.072046497836709,
  1.0663961358368397,
  1.0646966444328427,
  1.0603337539359927,
  1.0611246400512755,
  1.0562232602387667,
  1.0537425158545375,
  1.0516903614625335,
  1.0528460079804063,
  1.049127984791994,
  1.0482208440080285,
  1.0432165628299117,
  1.0452803932130337,
  1.041059397161007,
  1.03701829072088,
  1.034503124654293,
  1.032379132695496,
  1.0294081214815378,
  1.026275352574885,
  1.0248514954000711,
  1.022423004731536,
  1.0204979116097093,
  1.0193428806960583,
  1.0184711227193475,
  1.0158028323203325,
  1.013562735

In [12]:
trainer.predict()

torch.Size([19, 17, 1])


In [None]:
# Compute RMSE of test dataset
RMSE, RMSE_total = compute_metrics(df.iloc[-n_test_samples:], y_pred, metric='rmse')
MAE, MAE_total = compute_metrics(df.iloc[-n_test_samples:], y_pred, metric='mae')
MAPE, MAPE_total = compute_metrics(df.iloc[-n_test_samples:], y_pred, metric='mape')

df.iloc[-n_test_samples:].to_csv('Result/pred/ground_truth.csv', encoding='cp949')

NameError: name 'y_pred' is not defined

In [None]:
# corr 0.5 -dist
matplotlib_plot_font()
save_figure_predict(df, y_pred,
                    len_train, len_val, len_test-1, TIME_STEPS,
                    region_dict, suptitle_,date_split,
                    MAE, MAPE, RMSE,
                    MAE_total, MAPE_total, RMSE_total, 
                    'Result/')