In [None]:
import argparse
import pickle

import numpy as np; np.seterr(invalid='ignore')
import pandas as pd

In [None]:
parser = {
    'offset': 803,
    'val_len': 64,
    'seed': 20170913,
    'windows': [7, 14, 21, 35, 56, 91, 147, 238, 385, 623],
    'forecast_start': '2017-09-11',
    'forecast_end': '2017-11-13'
}
args = argparse.Namespace(**parser)

In [None]:
def smape(y_pred, y_true):
    y_pred = np.around(y_pred)
    denominator = y_true + y_pred
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0
    return 200 * np.nanmean(diff)

In [None]:
print("Getting data...")
full = pd.read_csv('../data/wttsf/train_2.csv')
full.iloc[:, 1:] = full.iloc[:, 1:].fillna(method='ffill', axis=1).fillna(
        method='bfill', axis=1)
datetime_list = pd.date_range(args.forecast_start, args.forecast_end)
for datetime in datetime_list:
    full[datetime.date().isoformat()] = 0

print("Constructing test set...")
test = pd.melt(full[list(
    full.columns[args.offset+1:args.offset+args.val_len+1])+['Page']],
    id_vars='Page', var_name='Date', value_name="Visits")
test['Date'] = test['Date'].astype('datetime64[ns]')
test['Weekend'] = test['Date'].dt.dayofweek >= 5

print("Constructing train set...")
train = full.iloc[:, :args.offset+1]

print("Getting medians...")
for i in args.windows:
    print(i, end=' ')
    val = 'MW'+str(i)
    tmp = pd.melt(train[list(train.columns[-i:])+['Page']],
                  id_vars='Page', var_name='Date', value_name=val)
    tmp['Date'] = tmp['Date'].astype('datetime64[ns]')
    tmp['Weekend']= tmp['Date'].dt.dayofweek >= 5           
    tmp1 = tmp.groupby(['Page', 'Weekend']).median().reset_index()
    test = test.merge(tmp1, how='left')
print("\n")

print("Getting median of medians...")
test['Predict'] = test[["MW7", "MW7", "MW14", "MW21", "MW35", "MW56", "MW91",
    "MW147", "MW238", "MW385", "MW623"]].median(axis=1)
#print("Result: ", smape(test['Predict'].values, test['Visits'].values))

In [None]:
with open("../intermediate/{}/pred_fib.pkl".format(args.seed), "wb") as f:
    predict_df = test[["Page", "Date", "Predict"]].pivot(
        index='Page', columns='Date')['Predict'].loc[full["Page"]]
    pickle.dump(predict_df.values, f)