In [1]:
import numpy as np
import argparse
from UCTB.dataset import NodeTrafficLoader
from UCTB.model import XGBoost
from UCTB.evaluation import metric
from UCTB.preprocess import SplitData
from UCTB.preprocess.time_utils import is_work_day_china, is_work_day_america
import nni
import os
from tqdm import tqdm


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [30]:
#use params and args to show its difference
args = {
    "dataset":'Metro',
    "city":"Shanghai",
    "MergeIndex":1,
    "data_range":"all",
    "train_data_length":"all",
    "MergeWay":"sum",
    "test_ratio":0.1,
    "closeness_len":12,
    "period_len":6,
    "trend_len":4,
    "external_use":"weather-holiday-tp", # "weather-holiday-tp"
    "normalize":False,
    "max_depth":8,
    "num_boost_round":51,
    "poi_distance":1000
}


In [31]:
data_loader = NodeTrafficLoader(dataset=args['dataset'], city=args['city'],
                                data_range=args['data_range'], train_data_length=args['train_data_length'],
                                test_ratio=args['test_ratio'],
                                closeness_len=args['closeness_len'],
                                period_len=args['period_len'],
                                trend_len=args['trend_len'],
                                normalize=False,
                                workday_parser=is_work_day_america if args['dataset'] == 'Bike' else is_work_day_china,
                                external_use=args['external_use'],
                                poi_distance=args['poi_distance'],
                                MergeIndex=args['MergeIndex'],
                                MergeWay="max" if args["dataset"] == "ChargeStation" else "sum")

Traffic shape is: (1368, 288)
Weather shape is: (1368, 30)
upSample weather feature
**** Using Weather feature ****
weather feature: (1368, 30)
**** Only use Metro service time and Fitness should be 60mins *****
**** Using holiday feature ****
holiday feature: (1368, 2)
**** Using temporal position feature ****
hour of day feature: (1368, 24)
day of week feature: (1368, 7)


In [32]:
data_loader.poi_dim

In [33]:
train_closeness, val_closeness = SplitData.split_data(
    data_loader.train_closeness, [0.9, 0.1])
train_period, val_period = SplitData.split_data(
    data_loader.train_period, [0.9, 0.1])
train_trend, val_trend = SplitData.split_data(
    data_loader.train_trend, [0.9, 0.1])
if data_loader.external_dim > 0:
    train_ef, val_ef = SplitData.split_data(data_loader.train_ef, [0.9, 0.1])
if data_loader.poi_dim is not None and data_loader.poi_dim > 0:
    train_poi,val_poi = SplitData.split_data(data_loader.poi_feature_train, [0.9, 0.1])
    
train_y, val_y = SplitData.split_data(data_loader.train_y, [0.9, 0.1])

prediction_test = []
prediction_val = []

In [34]:
for i in tqdm(range(data_loader.station_number),desc="training model..."):
#for i in range(data_loader.station_number):
    #print('Station', i)

    model = XGBoost(n_estimators=int(args['num_boost_round']), max_depth=int(args['max_depth']),verbosity=0)

    X_Train = []
    X_Val = []
    X_Test = []
    if int(args['closeness_len']) > 0:
        X_Train.append(train_closeness[:, i, :, 0])
        X_Val.append(val_closeness[:, i, :, 0])
        X_Test.append(data_loader.test_closeness[:, i, :, 0])
    if int(args['period_len']) > 0:
        X_Train.append(train_period[:, i, :, 0])
        X_Val.append(val_period[:, i, :, 0])
        X_Test.append(data_loader.test_period[:, i, :, 0])
    if int(args['trend_len']) > 0:
        X_Train.append(train_trend[:, i, :, 0])
        X_Val.append(val_trend[:, i, :, 0])
        X_Test.append(data_loader.test_trend[:, i, :, 0])
    
    # append external features
    if data_loader.external_dim > 0:
        X_Train.append(train_ef)
        X_Val.append(val_ef)
        X_Test.append(data_loader.test_ef)
    
    # append poi features
    if data_loader.poi_dim is not None and data_loader.poi_dim > 0:
        X_Train.append(train_poi[:,i,:])
        X_Val.append(val_poi[:,i,:])
        X_Test.append(data_loader.poi_feature_test[:,i,:])
    
    X_Train = np.concatenate(X_Train, axis=-1)
    X_Val = np.concatenate(X_Val, axis=-1)
    X_Test = np.concatenate(X_Test, axis=-1)

    model.fit(X_Train, train_y[:, i, 0])

    p_val = model.predict(X_Val)
    p_test = model.predict(X_Test)
    
    prediction_val.append(p_val.reshape([-1, 1, 1]))
    prediction_test.append(p_test.reshape([-1, 1, 1]))

training model...: 100%|█████████████████████████████████████████████████████████████| 288/288 [00:53<00:00,  5.43it/s]


In [35]:
p_test.reshape([-1, 1, 1]).shape

(137, 1, 1)

In [36]:
np.concatenate(prediction_val, axis=-2).shape

(73, 288, 1)

In [40]:
prediction_test = np.concatenate(prediction_test, axis=-2)
prediction_val = np.concatenate(prediction_val, axis=-2)

print('Val RMSE', metric.rmse(prediction_val, val_y, threshold=0))
print('Test RMSE', metric.rmse(prediction_test, data_loader.test_y, threshold=0))



ValueError: operands could not be broadcast together with shapes (21024,1) (73,288,1) 

In [39]:
X_Train.shape

(654, 85)

In [36]:
train_ef.shape

(561, 63)