In [1]:
import pandas as pd
import numpy as np

In [2]:
def masked_rmse_np(preds, labels, null_val=np.nan):
    return np.sqrt(masked_mse_np(preds=preds, labels=labels, null_val=null_val))

def masked_mse_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        rmse = np.square(np.subtract(preds, labels)).astype('float32')
        rmse = np.nan_to_num(rmse * mask)
        return np.mean(rmse)

def masked_mae_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mae = np.abs(np.subtract(preds, labels)).astype('float32')
        mae = np.nan_to_num(mae * mask)
        return np.mean(mae)

def masked_mape_np(preds, labels, null_val=np.nan):
    with np.errstate(divide='ignore', invalid='ignore'):
        if np.isnan(null_val):
            mask = ~np.isnan(labels)
        else:
            mask = np.not_equal(labels, null_val)
        mask = mask.astype('float32')
        mask /= np.mean(mask)
        mape = np.abs(np.divide(np.subtract(preds, labels).astype('float32'), labels))
        mape = np.nan_to_num(mask * mape)
        return np.mean(mape)

In [3]:
def static_predict(df, n_forward, test_ratio=0.2):
    """
    Assumes $x^{t+1} = x^{t}$
    :param df:
    :param n_forward:
    :param test_ratio:
    :return:
    """
    test_num = int(round(df.shape[0] * test_ratio))
    y_test = df[-test_num:]
    y_predict = df.shift(n_forward).iloc[-test_num:]
    return y_predict, y_test


In [4]:
path_0 = 'bay/speed_bay_0.csv'
path_5 = 'bay/speed_bay_5.csv'
path_10 = 'bay/speed_bay_10.csv'
path_20 = 'bay/speed_bay_20.csv'

In [5]:
org_path = 'bay/speed_bay_0.csv'

In [6]:
org_df = pd.read_csv(org_path).drop(columns=['Unnamed: 0'])
df_0 = pd.read_csv(path_0).drop(columns=['Unnamed: 0'])
df_5 = pd.read_csv(path_5).drop(columns=['Unnamed: 0'])
df_10 = pd.read_csv(path_10).drop(columns=['Unnamed: 0'])
df_20 = pd.read_csv(path_20).drop(columns=['Unnamed: 0'])

In [7]:
test_ratio = 0.2
n_sample, n_output = org_df.shape
n_test = int(round(n_sample * test_ratio))
n_train = n_sample - n_test
y_test = org_df[n_train:]

In [8]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_0, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	1.59154	3.38682	3.23085
HA	30 min	2.17300	4.95251	4.64266
HA	45 min	2.63596	6.07937	5.80040
HA	60 min	3.04716	7.00177	6.83315


In [9]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_5, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	4.64753	14.64935	8.00728
HA	30 min	5.20233	15.06677	9.35308
HA	45 min	5.64420	15.45396	10.45685
HA	60 min	6.03585	15.81995	11.43884


In [10]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_10, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	7.54843	20.03372	12.72533
HA	30 min	8.06659	20.32022	13.97899
HA	45 min	8.48074	20.59047	15.00941
HA	60 min	8.84890	20.84984	15.92812


In [11]:
n_forwards = [3, 6, 9, 12]

for i, forward in enumerate(n_forwards):
    y_predict, _ = static_predict(df_20, n_forward=forward, test_ratio=0.2)
    rmse = masked_rmse_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mape = masked_mape_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    mae = masked_mae_np(preds=y_predict.values, labels=y_test.values, null_val=0)
    line = 'HA\t%d min\t%.5f\t%.5f\t%.5f' % (forward*5, mae, rmse, mape * 100)
    print(line)

HA	15 min	13.87815	28.58653	22.64576
HA	30 min	14.35514	28.77482	23.81063
HA	45 min	14.73492	28.95298	24.76641
HA	60 min	15.07346	29.12509	25.62024
