In [1]:
import math
import time
import pandas as pd
import numpy as np
import numpy.linalg as la

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor

In [2]:
def load_data(path):
    df = pd.read_csv(path)
    data = df.drop(columns=['Unnamed: 0'])
    
    return data

In [3]:
def preprocess_data(data, time_len, rate, seq_len, pre_len):
    train_size = int(time_len * rate)
    train_data = data[0:train_size]
    test_data = data[train_size:time_len]
    
    trainX, trainY, testX, testY = [], [], [], []
    for i in range(len(train_data) - seq_len - pre_len):
        a = train_data[i: i + seq_len + pre_len]
        trainX.append(a[0 : seq_len])
        trainY.append(a[seq_len : seq_len + pre_len])
    for i in range(len(test_data) - seq_len -pre_len):
        b = test_data[i: i + seq_len + pre_len]
        testX.append(b[0 : seq_len])
        testY.append(b[seq_len : seq_len + pre_len])
        
    return trainX, trainY, testX, testY

In [4]:
def getTestY(data):
    tmp_scaler = StandardScaler()
    tmp_data = tmp_scaler.fit_transform(data)
    tmp_data = tmp_scaler.inverse_transform(tmp_data)
    _, _, _, testY = preprocess_data(tmp_data, data.shape[0], train_rate, seq_len, pre_len)
    testY = np.array(testY)
    testY = np.reshape(testY, [-1, pre_len])
    
    return testY

In [5]:
 def evaluation(a,b):
    rmse = math.sqrt(mean_squared_error(a,b))
    mae = mean_absolute_error(a, b)
    mape = mean_absolute_percentage_error(a, b)
    F_norm = la.norm(a-b)/la.norm(a)
    
    return rmse, mae, mape, 1-F_norm

In [6]:
def predictSVR(data, testY):
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    
    rmses, maes, mapes, accs = [], [], [], []
    for i in range(data.shape[1]):
        print('Node', i)
        start = time.time()
        a = data[:, i]
        aX, aY, tX, tY = preprocess_data(a, data.shape[0], train_rate, seq_len, pre_len)
        
        aX = np.array(aX)
        aX = np.reshape(aX, [-1, seq_len])
        aY = np.array(aY)
        aY = np.reshape(aY, [-1, pre_len])

        tX = np.array(tX)
        tX = np.reshape(tX, [-1, seq_len])
        tY = np.array(tY)
        tY = np.reshape(tY, [-1, pre_len])
        
        print('(', aX.shape, aY.shape, tX.shape, tY.shape, round(aX.mean(),3), ')')
        reg = MultiOutputRegressor(SVR(kernel='linear'))
        reg.fit(aX, aY)
        pred = reg.predict(tX)
        
        mean = scaler.mean_[i]
        std = np.sqrt(scaler.var_[i])
        pred = pred*std + mean
        tY = tY*std + mean
        if i==data.shape[1]-1: tY = testY[tY.shape[0]*i:]
        else: tY = testY[tY.shape[0] * i:tY.shape[0] * (i+1)]
        
        rmse, mae, mape, acc = evaluation(tY, pred)
        rmses.append(rmse)
        maes.append(mae)
        mapes.append(mape)
        accs.append(acc)
        
        print('(', rmse, mae, mape, acc, time.time() - start)
    
    print('RMSE: ' + str(np.mean(rmses)) + ', MAE: ' + str(np.mean(maes)) + ', MAPE: ' + str(np.mean(mapes)) + ', ACC: ' + str(np.mean(accs)))

In [7]:
train_rate = 0.8
seq_len = 12
pre_len = 9

In [8]:
path_0 = 'data/METR-LA/speed_la_0.csv'
path_5 = 'data/METR-LA/speed_la_5.csv'
path_10 = 'data/METR-LA/speed_la_10.csv'
path_20 = 'data/METR-LA/speed_la_20.csv'

In [9]:
masterData = load_data(path_0)
print(masterData.shape, (masterData == 0).sum().sum())
testY = getTestY(masterData)
testY.shape, (testY == 0).sum().sum()

(34272, 207) 0


((1414638, 9), 0)

### Unobserved Node = 0%

In [10]:
data = load_data(path_0)
print(data.shape, (data == 0).sum().sum())
predictSVR(data, testY)

(34272, 207) 0
Node 0
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.011 )
( 22.28368250403314 14.597110158816143 0.5820142233825448 0.6054693616726359 379.31836891174316
Node 1
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.008 )
( 16.976743086936295 10.079912378170588 0.391276893227374 0.7062362346719975 741.9941573143005
Node 2
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.016 )
( 13.930583439245247 8.960767607290757 0.20096199576260568 0.7704828902354471 329.6384320259094
Node 3
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.012 )
( 17.547791931647417 13.748898794197594 0.39897042227071194 0.6882157355581615 665.0823822021484
Node 4
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.037 )
( 24.444096381386707 19.914675891741954 0.6422564165843314 0.545333825228411 435.5925943851471
Node 5
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.037 )
( 14.634971978299781 11.816424626462656 0.19567865258076617 0.7690577768958944 746.135585308075
Node 6
( (27396, 12) (27396, 9) (6834, 12)

( 18.522929993572085 12.217791425545643 0.19501210351997997 0.7143796277037195 326.78624725341797
Node 53
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.018 )
( 12.576385159140887 11.19469507738926 0.1877628001026483 0.8079767177179606 885.9492084980011
Node 54
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.017 )
( 14.157394020055285 9.480528661543888 0.17175080771497436 0.7796842830154891 511.81879925727844
Node 55
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.0 )
( 11.470560662888692 7.763380463281269 0.18940205247191894 0.8163038639423142 843.8786659240723
Node 56
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.156 )
( 23.663383215805354 18.346713404086863 0.3142957119493021 0.6272470831779207 610.9351406097412
Node 57
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.005 )
( 14.48683822549872 10.655196918316868 0.18303423115527803 0.7733983618042352 405.4897964000702
Node 58
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.011 )
( 9.48487355625632 6.587551051833227 0.13874788617

( 12.101821694788232 7.895754339975558 0.1579025908968948 0.808462030630687 452.95563673973083
Node 105
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.005 )
( 9.046783269572192 5.437970767332067 0.11276339327850717 0.8581149912768165 684.8699650764465
Node 106
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.002 )
( 14.692032695575657 8.38214471947086 0.2508725045574898 0.7576679788738753 648.6336376667023
Node 107
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.011 )
( 24.298076523466158 16.28445324016339 0.6704331808800041 0.5594507785146222 719.3897731304169
Node 108
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.006 )
( 23.51268457150478 15.804213373584288 0.6095413674743098 0.5772865957309489 546.7742795944214
Node 109
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.017 )
( 15.43363618234204 11.269660416125822 0.32490737612712045 0.7418868480678147 753.2626569271088
Node 110
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.012 )
( 13.169901452244986 7.380210371490102 0.21783592

( 9.424321891905509 7.231261084898319 0.1456404863346557 0.8435251910484185 677.9507923126221
Node 157
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.016 )
( 17.26662845387683 10.532393341368165 0.25641968393708764 0.7158464341284277 374.86922693252563
Node 158
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.004 )
( 18.378403805642304 12.999389357283409 0.28694132474112466 0.6905880532828106 530.6573884487152
Node 159
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.036 )
( 25.732761842119885 19.148267156176843 0.4614890203295633 0.5445580234011514 270.2201614379883
Node 160
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.036 )
( 29.74067535251644 23.451907514229344 0.7566809644559718 0.410817526223474 260.08924555778503
Node 161
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.001 )
( 25.106600888127335 17.657841069326473 0.777875019569251 0.5295986871056735 384.3713381290436
Node 162
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.016 )
( 18.007521232300824 10.84384621739975 0.283238

### Unobserved Node = 5%

In [11]:
data = load_data(path_5)
print(data.shape, (data == 0).sum().sum())
predictSVR(data, testY)

(34272, 207) 342720
Node 0
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.011 )
( 22.28368250403314 14.597110158816143 0.5820142233825448 0.6054693616726359 369.8063974380493
Node 1
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.008 )
( 16.976743086936295 10.079912378170588 0.391276893227374 0.7062362346719975 730.7359662055969
Node 2
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.016 )
( 13.930583439245247 8.960767607290757 0.20096199576260568 0.7704828902354471 322.1611578464508
Node 3
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.012 )
( 17.547791931647417 13.748898794197594 0.39897042227071194 0.6882157355581615 652.4403150081635
Node 4
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.037 )
( 24.444096381386707 19.914675891741954 0.6422564165843314 0.545333825228411 428.5847113132477
Node 5
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.037 )
( 14.634971978299781 11.816424626462656 0.19567865258076617 0.7690577768958944 730.3330936431885
Node 6
( (27396, 12) (27396, 9) (6834

( 18.522929993572085 12.217791425545643 0.19501210351997997 0.7143796277037195 311.0361750125885
Node 53
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.018 )
( 12.576385159140887 11.19469507738926 0.1877628001026483 0.8079767177179606 845.9940657615662
Node 54
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.017 )
( 14.157394020055285 9.480528661543888 0.17175080771497436 0.7796842830154891 491.275022983551
Node 55
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.0 )
( 11.470560662888692 7.763380463281269 0.18940205247191894 0.8163038639423142 812.146345615387
Node 56
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.156 )
( 23.663383215805354 18.346713404086863 0.3142957119493021 0.6272470831779207 578.6756279468536
Node 57
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.005 )
( 14.48683822549872 10.655196918316868 0.18303423115527803 0.7733983618042352 382.98951625823975
Node 58
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.011 )
( 9.48487355625632 6.587551051833227 0.13874788617937

( 9.046783269572192 5.437970767332067 0.11276339327850717 0.8581149912768165 648.6798079013824
Node 106
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.002 )
( 14.692032695575657 8.38214471947086 0.2508725045574898 0.7576679788738753 615.1808996200562
Node 107
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.011 )
( 24.298076523466158 16.28445324016339 0.6704331808800041 0.5594507785146222 684.123601436615
Node 108
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.006 )
( 23.51268457150478 15.804213373584288 0.6095413674743098 0.5772865957309489 519.5531160831451
Node 109
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.017 )
( 15.43363618234204 11.269660416125822 0.32490737612712045 0.7418868480678147 716.0696167945862
Node 110
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) -0.012 )
( 13.169901452244986 7.380210371490102 0.21783592307917307 0.7887289790779128 564.3750998973846
Node 111
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.004 )
( 17.139263381986375 9.914540544534326 0.25260631

( 25.732761842119885 19.148267156176843 0.4614890203295633 0.5445580234011514 255.6069462299347
Node 160
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.036 )
( 29.74067535251644 23.451907514229344 0.7566809644559718 0.410817526223474 244.6233355998993
Node 161
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.001 )
( 25.106600888127335 17.657841069326473 0.777875019569251 0.5295986871056735 363.14789724349976
Node 162
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.016 )
( 18.007521232300824 10.84384621739975 0.2832385965550303 0.7054273697610222 277.0066747665405
Node 163
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.036 )
( 29.60874887529984 21.84200039310556 0.42142636896838986 0.5198671658917492 183.87654757499695
Node 164
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.008 )
( 16.73200952245106 11.184234177225767 0.2107396691928787 0.7307244928100898 403.2488520145416
Node 165
( (27396, 12) (27396, 9) (6834, 12) (6834, 9) 0.018 )
( 19.652162432503086 14.093595070502593 0.25382844