# Programa principal para la ejecuccion de modelo de aprendizaje basado en grafos

---



*   Este notebook servira como entorno de prube del proyecto: https://dataverse.harvard.edu/dataverse/atlas.
*   El resultado seran dos ficheros csv. El primero contiene el, mientras que el segundo tendra la matriz de pesos del grafo.
*   Los datos seran procesados (eliminacion de duplicados, valores vacios, columnas/filas...) y normalizados.

In [1]:
from os.path import join as pjoin
from tensorflow.compat import v1 as tf
from google.colab import drive
import tensorflow
import argparse
import sys
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
tf.Session(config=config)

drive.mount('/gdrive', force_remount=True)
sys.path.append('/gdrive/MyDrive/Master IA/TFM/Predicting-international-trade-with-Graph-Neural-Networks')

Mounted at /gdrive


In [2]:
from utils.math_graph import *
from data_loader.data_utils import *
from models.trainer import model_train
from models.tester import model_test

### Pruebas

### Main

In [8]:
parser = argparse.ArgumentParser()
parser.add_argument('--n_route', type=int, default=600) #default=228
parser.add_argument('--n_his', type=int, default=10) #default=13
parser.add_argument('--n_pred', type=int, default=6) #default=3
parser.add_argument('--batch_size', type=int, default=1) # default=50 antes 1, con el batch size de 2
parser.add_argument('--epoch', type=int, default= 5) # default=50
parser.add_argument('--save', type=int, default=1)
parser.add_argument('--ks', type=int, default=4) # 4
parser.add_argument('--kt', type=int, default=3)
parser.add_argument('--lr', type=float, default=1e-3) #1e-3)
parser.add_argument('--opt', type=str, default='ADAM') # RMSProp
parser.add_argument('--graph', type=str, default='default')
parser.add_argument('--inf_mode', type=str, default='merge') ## 'sep' para casos de n_pred < 3, pero eso luego da problemas!
parser.add_argument('--normalization', type=str, default='z_score') # z_score o robust o log_scale o none

args, unknown = parser.parse_known_args()
print(f'Training configs: {args}')

n, n_his, n_pred = args.n_route, args.n_his, args.n_pred
Ks, Kt = args.ks, args.kt
# blocks: settings of channel size in st_conv_blocks / bottleneck design

blocks = [[1, 512, 1024], [1024, 1024, 2048]]

# Load wighted adjacency matrix W
if args.graph == 'default':
    W = weight_matrix('/gdrive/My Drive/Master IA/TFM/International Trade Data/Weights.csv')
else:
    # load customized graph weight matrix
    W = weight_matrix(pjoin('./dataset', args.graph))

# Calculate graph kernel
L = scaled_laplacian(W)
# Alternative approximation method: 1st apprsox - first_approx(W, n).
Lk = cheb_poly_approx(L, Ks, n)

tf.add_to_collection(name='graph_kernel', value=tf.cast(tf.constant(Lk), tf.float32))

# Data Preprocessing
# n_his + n_pred == n_frame: It is the number of rows we are going to put in each series, right now 18.
# n_train + n_val + n_test == longitud_"dia" - n_frame; For this case a "day" is the whole base, so its length is 26.

data_file = '/gdrive/My Drive/Master IA/TFM/International Trade Data/Data.csv'
#TODO comprobar que pasa con validacion o test a 0
n_train, n_val, n_test = 5, 5, 0
# If you are using interpolation:
# 20, 0, 1 You are testing 2020!
# 16, 0, 1 You are testing 2016!

# If you are not using interpolation:
# 6, 0, 1 You are testing 2016!

interpolation = False
PeMS, values = data_gen(data_file, (n_train, n_val, n_test, args.normalization), n, n_his + n_pred, interpolation)
#TODO ver cuando funciona y cuando no
try:
    print(f">> Loading dataset with Mean: {PeMS.get_stats()['mean']:.2f}, STD: {PeMS.get_stats()['std']:.2f}")
except:
    print("Dataset column by column, or an error has appeared, be sure to check!")


Training configs: Namespace(n_route=600, n_his=10, n_pred=6, batch_size=1, epoch=5, save=1, ks=4, kt=3, lr=0.001, opt='ADAM', graph='default', inf_mode='merge', normalization='z_score')
(600, 600)
The input graph is a 0/1 matrix; set "scaling" to False.
0 16
1 17
2 18
3 19
4 20
5 21
6 22
7 23
8 24
9 25
>> Loading dataset with Mean: 9021642374.97, STD: 23669712377.19


In [10]:
values[0:8, 0]

array([3.47444292e+08, 3.70980534e+08, 4.75072765e+08, 5.40751568e+08,
       5.35313758e+08, 4.64327555e+08, 4.03222911e+08, 4.50592528e+08])

In [5]:
values[0:30, 0]

array([3.47444292e+08, 3.53328352e+08, 3.59212413e+08, 3.65096474e+08,
       3.70980534e+08, 3.97003592e+08, 4.23026650e+08, 4.49049707e+08,
       4.75072765e+08, 4.91492466e+08, 5.07912166e+08, 5.24331867e+08,
       5.40751568e+08, 5.39392116e+08, 5.38032663e+08, 5.36673210e+08,
       5.35313758e+08, 5.17567207e+08, 4.99820656e+08, 4.82074106e+08,
       4.64327555e+08, 4.49051394e+08, 4.33775233e+08, 4.18499072e+08,
       4.03222911e+08, 4.15065315e+08, 4.26907720e+08, 4.38750124e+08,
       4.50592528e+08, 4.44024793e+08])

In [4]:
model_train(PeMS, blocks, args)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Tamaño de x Tensor("strided_slice:0", shape=(None, 10, 600, 1), dtype=float32)
Nuevo x tras un bloque convolucional:  Tensor("dropout/SelectV2:0", shape=(None, 6, 600, 1024), dtype=float32)
Tamaño de x Tensor("dropout/SelectV2:0", shape=(None, 6, 600, 1024), dtype=float32)
Nuevo x tras un bloque convolucional:  Tensor("dropout_1/SelectV2:0", shape=(None, 2, 600, 2048), dtype=float32)
Epoch  0, Step   0: [3929.589, 5.389]
Epoch: 0, Training Time: 00:00:55
Time Step 1: MAPE 393.538%; MAE  0.733; RMSE  1.427.
Time Step 2: MAPE 284.377%; MAE  0.769; RMSE  1.448.
Epoch: 0, Inference Time: 00:00:19
<< Saving model to ./output/models/STGCN-5 ...
Epoch  1, Step   0: [237.730, 4.900]
Epoch: 1, Training Time: 00:00:54
Time Step 1: MAPE 305.702%; MAE  0.494; RMSE  1.007.
Time Step 2: MAPE 254.262%; MAE  0.529; RMSE  1.107.
Epoch: 1, Inference Time: 00:00:18
<< Saving model to ./output/models/STGCN-10 ...
Epoch  2, Step   0: [111.644, 4.900]
Epoch: 2, Training Time: 00:00:53
Time Step 1: MAPE 191.

In [5]:
model_test(PeMS, PeMS.get_len('test'), n_his, n_pred, 'merge')

>> Loading saved model from ./output/models/STGCN-25 ...
<class 'list'>
Time Step 1: MAPE 113.910%; MAE  0.237; RMSE  0.943.
Time Step 2: MAPE 125.948%; MAE  0.372; RMSE  0.968.
Model Test Time: 00:00:19
Testing model finished!


In [35]:
seq =  PeMS.get_data('test')[0:1, 10:, :, 0]

In [None]:
import pandas as pd

In [None]:
x_guardarZ = pd.DataFrame(seq[0, :, :])

In [None]:
x_guardarZ.shape

(3, 600)

In [1]:
import numpy as np

In [11]:
y = np.array([[1 ,2 ,3], [11, 12, 13]])
z = np.array([[4 ,5 ,6], [44, 55, 66]])
v = np.array([[7 ,8 ,9], [44, 55, 66]])

# y = np.array([1 ,2 ,3])
# z = np.array([4 ,5 ,6])

x = [y, z, v]

In [13]:
# np.concatenate(x, axis=-1)
listx =  np.concatenate(x, axis=-1)
print(listx)

[[ 1  2  3  4  5  6  7  8  9]
 [11 12 13 44 55 66 44 55 66]]


In [25]:
n_pred = 9
step_idx = tmp_idx = np.arange(3, n_pred + 1, 3) - 1
print(step_idx)

[2 5 8]


In [29]:
tmp_idx = np.arange(len(step_idx))
print(tmp_idx)

[0]


In [30]:
for ix in tmp_idx:
    te = listx[0, ix*3:(ix*3)+3]
    print(f'Time Step {ix + 1}: '
        f'MAPE {te[0]:7.3%}; '
        f'MAE  {te[1]:4.3f}; '
        f'RMSE {te[2]:6.3f}.')


Time Step 1: MAPE 100.000%; MAE  2.000; RMSE  3.000.


In [None]:
    print(listx[0, -3: -5])

[]
