In [1]:
import pandas as pd
import numpy as np

In [2]:
def masked_rmse_np(preds, labels, null_val=np.nan):
    return np.sqrt(masked_mse_np(preds=preds, labels=labels, null_val=null_val))

def masked_mse_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        rmse = np.square(np.subtract(preds, labels)).astype('float32')
        rmse = np.nan_to_num(rmse * mask)
        return np.mean(rmse)

def masked_mae_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mae = np.abs(np.subtract(preds, labels)).astype('float32')
        mae = np.nan_to_num(mae * mask)
        return np.mean(mae)

def masked_mape_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels))
        mape = np.nan_to_num(mask * mape)
        return np.mean(mape)

In [3]:
def static_predict(df, n_forward, test_ratio=0.2):
    """
    Assumes $x^{t+1} = x^{t}$
    :param df:
    :param n_forward:
    :param test_ratio:
    :return:
    """
    test_num = int(round(df.shape[0] * test_ratio))
    y_test = df[-test_num:]
    y_predict = df.shift(n_forward).iloc[-test_num:]
    return y_predict, y_test


In [4]:
path_0 = 'la/speed_la_0.csv'
path_5 = 'la/speed_la_5.csv'
path_10 = 'la/speed_la_10.csv'
path_20 = 'la/speed_la_20.csv'

In [5]:
org_path = 'la/speed_la_0.csv'

In [6]:
org_df = pd.read_csv(org_path).drop(columns=['Unnamed: 0'])
df_0 = pd.read_csv(path_0).drop(columns=['Unnamed: 0'])
df_5 = pd.read_csv(path_5).drop(columns=['Unnamed: 0'])
df_10 = pd.read_csv(path_10).drop(columns=['Unnamed: 0'])
df_20 = pd.read_csv(path_20).drop(columns=['Unnamed: 0'])

In [7]:
test_ratio = 0.2
n_sample, n_output = org_df.shape
n_test = int(round(n_sample * test_ratio))
n_train = n_sample - n_test
y_test = org_df[n_train:]

In [8]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_0, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	3.10622	6.05488	7.66188
HA	30 min	3.76794	7.63339	9.77326
HA	45 min	4.34149	8.83423	11.60009
HA	60 min	4.86868	9.84163	13.25619


In [9]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_5, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	5.61398	13.87321	12.06914
HA	30 min	6.23510	14.58824	14.05712
HA	45 min	6.77180	15.21145	15.77417
HA	60 min	7.26741	15.78375	17.33893


In [10]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_10, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	8.30479	19.12186	16.57362
HA	30 min	8.90310	19.62891	18.48027
HA	45 min	9.42124	20.08108	20.12545
HA	60 min	9.89893	20.50250	21.61931


In [11]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_20, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	14.15848	27.48197	25.91892
HA	30 min	14.68089	27.78324	27.56519
HA	45 min	15.13397	28.05620	28.98938
HA	60 min	15.54981	28.31349	30.27677
