In [1]:
import pandas as pd
import numpy as np

In [2]:
def masked_rmse_np(preds, labels, null_val=np.nan):
    return np.sqrt(masked_mse_np(preds=preds, labels=labels, null_val=null_val))

def masked_mse_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        rmse = np.square(np.subtract(preds, labels)).astype('float32')
        rmse = np.nan_to_num(rmse * mask)
        return np.mean(rmse)

def masked_mae_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mae = np.abs(np.subtract(preds, labels)).astype('float32')
        mae = np.nan_to_num(mae * mask)
        return np.mean(mae)

def masked_mape_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels))
        mape = np.nan_to_num(mask * mape)
        return np.mean(mape)

In [3]:
def static_predict(df, n_forward, test_ratio=0.2):
    """
    Assumes $x^{t+1} = x^{t}$
    :param df:
    :param n_forward:
    :param test_ratio:
    :return:
    """
    test_num = int(round(df.shape[0] * test_ratio))
    y_test = df[-test_num:]
    y_predict = df.shift(n_forward).iloc[-test_num:]
    return y_predict, y_test


In [4]:
path_0 = 'gangnam/speed_gangnam_0.csv'
path_5 = 'gangnam/speed_gangnam_5.csv'
path_10 = 'gangnam/speed_gangnam_10.csv'
path_20 = 'gangnam/speed_gangnam_20.csv'

In [5]:
org_path = 'gangnam/speed_gangnam_0.csv'

In [6]:
org_df = pd.read_csv(org_path).drop(columns=['Unnamed: 0'])
df_0 = pd.read_csv(path_0).drop(columns=['Unnamed: 0'])
df_5 = pd.read_csv(path_5).drop(columns=['Unnamed: 0'])
df_10 = pd.read_csv(path_10).drop(columns=['Unnamed: 0'])
df_20 = pd.read_csv(path_20).drop(columns=['Unnamed: 0'])

In [7]:
test_ratio = 0.2
n_sample, n_output = org_df.shape
n_test = int(round(n_sample * test_ratio))
n_train = n_sample - n_test
y_test = org_df[n_train:]

In [8]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_0, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	3.77962	5.85454	15.74916
HA	30 min	4.13101	6.25154	17.37849
HA	45 min	4.34551	6.45366	18.27880
HA	60 min	4.49262	6.59327	18.92965


In [9]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_5, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	4.88590	8.32188	19.93153
HA	30 min	5.22272	8.59714	21.49657
HA	45 min	5.42533	8.73793	22.34043
HA	60 min	5.56684	8.83760	22.97306


In [10]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_10, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	6.28731	11.29763	24.11488
HA	30 min	6.59541	11.47700	25.54941
HA	45 min	6.78988	11.57815	26.36482
HA	60 min	6.91989	11.64571	26.94659


In [11]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_20, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	8.65663	14.27565	32.75874
HA	30 min	8.95196	14.42036	34.13178
HA	45 min	9.12217	14.49195	34.85474
HA	60 min	9.24053	14.54364	35.39761
