# Project: Web Traffic Forecasting


In [None]:
import edward as ed
import numpy as np
import tensorflow as tf
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
from edward.models import Normal, Laplace, Empirical

from model import *

In [None]:
%matplotlib inline

## Data

In [None]:
# Parameters
FPATH = "./data/Valentine's_Day_en.wikipedia.org_all-access_spider.csv"
SDATE = pd.datetime(2017, 7, 10)

# Prepare dataframe
df = pd.read_csv(FPATH)
df["y"] = np.log(df["y"])
df  = setup_dataframe(df)

# Split data into train and test
history = df[df['ds'] <= SDATE].copy()
future = df[df['ds'] > SDATE].copy()
print("History: %d, Future: %d" % (history.shape[0], future.shape[0]))

plt.plot(history['ds'],history['y'])
plt.xticks(rotation=90)
plt.show()

# Add a scaled t (time index) and y (#views)
t_start = history['ds'].min()
t_scale = history['ds'].max() - t_start
if t_scale == 0:
    raise ValueError("Timeseries start == end")
y_scale = history['y'].max()
if y_scale == 0:
    y_scale = 1
history['t'] = (history['ds'] - t_start) / t_scale
history['y_scaled'] = history['y'] / y_scale
print("History dataframe: %d\n" % history.shape[0], history.head())

In [None]:
# Extract features
holiday_en_us = ['2015-01-01', '2015-01-19', '2015-05-25', '2015-07-03', '2015-09-07', '2015-11-26', '2015-11-27', '2015-12-25', 
                 '2016-01-01', '2016-01-18', '2016-05-30', '2016-07-04', '2016-09-05', '2016-11-11', '2016-11-24', '2016-12-26', 
                 '2017-01-01', '2017-01-02', '2017-01-16', '2017-05-29', '2017-07-04', '2017-09-04', '2017-11-10', '2017-11-23', 
                 '2017-12-25',
                 '2015-02-14', '2016-02-14', '2017-02-14']
holidays = pd.DataFrame({
  'holiday': 'US public holiday',
  'ds': pd.to_datetime(holiday_en_us),
  'lower_window': -1,
  'upper_window': 0,
  'prior_scale': 50.0
})
# holidays = None

seasonal_features, prior_scales = make_seasonality_features(history, yearly=True, weekly=True, 
                                                            holidays=holidays)
print("Seasonal features:\n")
print(seasonal_features.columns)
    
K = seasonal_features.shape[1] # number of seasonal factors
changepoints_t = get_changepoints(history, n_changepoints=25)
S = len(changepoints_t) # number of change points
changepoint_prior_scale = 0.05

print("Seasonal_features: %d\n" % K)

if holidays is not None:
    print("Holidays:\n")
    holiday_ds = []
    for feature in seasonal_features:
        if feature.split("_delim_")[0] in set(holidays['holiday']):
            holiday_ds.extend(seasonal_features[seasonal_features[feature]==1.0].index)
    print(history.iloc[np.unique(holiday_ds)]["ds"])


print("Changepoints: %d" % S)
X_train = {
    't': history['t'].as_matrix(), # day
    'A': get_changepoint_matrix(history, changepoints_t), # split indicator
    'X': seasonal_features, # seasonal vectors
    'sigmas': prior_scales, # scale on seasonality prior
}

Y_train = history['y_scaled'].as_matrix()

## Model

In [None]:
# Model
t = tf.placeholder(tf.float32, shape=None, name="t")        # time index
A = tf.placeholder(tf.float32, shape=(None, S), name="A")      # changepoint indicators
t_change = tf.placeholder(tf.float32, shape=(S), name="t_change") # changepoints_t
X = tf.placeholder(tf.float32, shape=(None, K), name="X")      # season vectors
sigmas = tf.placeholder(tf.float32, shape=(K,), name="sigmas")  # scale on seasonality prior
tau = tf.placeholder(tf.float32, shape=(), name="tau")      # scale on changepoints prior
       
k = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))           # initial slope
m = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))           # initial intercept
sigma_obs = Normal(loc=tf.zeros(1), scale=0.5*tf.ones(1))   # noise

delta = Laplace(loc=tf.zeros(S), scale=tau*tf.ones(S))      # changepoint rate adjustment
gamma = tf.multiply(-t_change, delta, name="gamma")

beta = Normal(loc=tf.zeros(K), scale=sigmas*tf.ones(K))     # seasonal

trend_loc = (k + ed.dot(A, delta)) * t + (m + ed.dot(A, gamma))
seas_loc = ed.dot(X, beta)
y = Normal(loc = trend_loc + seas_loc, scale = sigma_obs)

## Inference

In [None]:
# Inference
ITR = 5000                       # Number of samples.

# Init k, m
def init_km(df):
    i0, i1 = df['ds'].idxmin(), df['ds'].idxmax()
    T = df['t'].iloc[i1] - df['t'].iloc[i0]
    k = (df['y_scaled'].iloc[i1] - df['y_scaled'].iloc[i0]) / T
    m = df['y_scaled'].iloc[i0] -  k * df['t'].iloc[i0]
    return (k, m)

kinit, minit = init_km(history)
print("Initial slope / intercept: %f, %f" % (kinit, minit))
qk = Empirical(params=tf.Variable(kinit * tf.ones([ITR, 1])))
qm = Empirical(params=tf.Variable(minit * tf.ones([ITR, 1])))
qsigma_obs = Empirical(params=tf.Variable(tf.ones([ITR, 1])))
qbeta = Empirical(params=tf.Variable(tf.zeros([ITR, K])))
qdelta = Empirical(params=tf.Variable(tf.zeros([ITR, S])))

inference = ed.HMC({k: qk, m: qm, sigma_obs: qsigma_obs, beta: qbeta, delta:qdelta}, 
                   data={y: Y_train, 
                         t: X_train['t'],
                         A: X_train['A'], 
                         X: X_train['X'].as_matrix(), 
                         sigmas: X_train['sigmas'], 
                         t_change: changepoints_t,
                         tau: changepoint_prior_scale})
inference.run(step_size=5e-4)

## Evaluation

In [None]:
# Add scaled t and y
future['t'] = (future['ds'] - t_start) / t_scale
future['y_scaled'] = future['y'] / y_scale
print("Future dataframe: %d\n" % future.shape[0], future.head())
print()

# Extract seasonality features
future_seasonal, future_prior_scales = make_seasonality_features(future, 
                                                                 yearly=True, weekly=True,
                                                                 holidays=holidays)
assert(future_seasonal.shape[1] == K)
assert(all(future_seasonal.columns == seasonal_features.columns))

X_test = {
    't': future['t'].as_matrix(), # day
    'A': get_changepoint_matrix(future, changepoints_t), # split indicator
    'X': future_seasonal, # seasonal vectors
    'sigmas': future_prior_scales, # scale on seasonality prior
}

Y_test = future['y_scaled'].as_matrix()

# Evaluate test data
y_post = ed.copy(y, {k: qk, m: qm, sigma_obs: qsigma_obs, beta: qbeta, delta:qdelta})
print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error',  #mean_absolute_percentage_error
                  data={y_post: Y_test, 
                        t: X_test['t'],
                        A: X_test['A'], 
                        X: X_test['X'].as_matrix(), 
                        sigmas: X_test['sigmas'], 
                        t_change: changepoints_t,
                        tau: changepoint_prior_scale}))


In [None]:
# Prediction
sess = ed.get_session()

y_pred = np.array([sess.run([y_post], 
                  feed_dict={t: X_test['t'],
                             A: X_test['A'], 
                             X: X_test['X'].as_matrix(), 
                             sigmas: X_test['sigmas'], 
                             t_change: changepoints_t,
                             tau: changepoint_prior_scale}) for _ in range(500)]).mean(axis=0)[0]

# Metrics 
def evalute(y_true, y_pred):
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    smape = np.mean(np.abs((y_true - y_pred)) / (np.abs((y_true + y_pred)))) * 100
    mse = ((y_true - y_pred) ** 2).mean()
    print("MAPE = %f" % mape)
    print("SMAPE = %f" % smape)
    print("MSE = %f" % mse)
    
evalute(future['y_scaled'], y_pred)
plt.plot(future['ds'], future['y_scaled'])
plt.plot(future['ds'], y_pred)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Training error 
y_train_pred = np.array([sess.run([y_post], 
                  feed_dict={t: X_train['t'],
                             A: X_train['A'], 
                             X: X_train['X'].as_matrix(), 
                             sigmas: X_train['sigmas'], 
                             t_change: changepoints_t,
                             tau: changepoint_prior_scale}) for _ in range(500)]).mean(axis=0)[0]

# Metrics 
def evalute(y_true, y_pred):
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    smape = np.mean(np.abs((y_true - y_pred)) / (np.abs((y_true + y_pred)))) * 100
    mse = ((y_true - y_pred) ** 2).mean()
    print("MAPE = %f" % mape)
    print("SMAPE = %f" % smape)
    print("MSE = %f" % mse)
    
evalute(history['y_scaled'], y_train_pred)
plt.plot(history['ds'], history['y_scaled'])
plt.plot(history['ds'], y_train_pred)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Posterior check
kmean, kstddev = sess.run([qk.mean(), qk.stddev()])
print("Inferred posterior k: mean = %f, stddev = %f" % (kmean, kstddev))
mmean, mstddev = sess.run([qm.mean(), qm.stddev()])
print("Inferred posterior m: mean = %f, stddev = %f" % (mmean, mstddev))

noise_mean, noise_stddev = sess.run([qsigma_obs.mean(), qsigma_obs.stddev()])
print("Inferred posterior noise: mean = %f, stddev = %f" % (noise_mean, noise_stddev))

nburn = 500
stride = 10
sns.distplot(qk.params.eval()[nburn:ITR:stride])
plt.show()
sns.distplot(qm.params.eval()[nburn:ITR:stride])
plt.show()

## Visualize 

In [None]:
# Prediction
sess = ed.get_session()

# TODO: mean?
y_pred = sess.run([y_post.mean()], 
                  feed_dict={t: X_test['t'],
                             A: X_test['A'], 
                             X: X_test['X'].as_matrix(), 
                             sigmas: X_test['sigmas'], 
                             t_change: changepoints_t,
                             tau: changepoint_prior_scale})[0]
plt.plot(future['ds'], future['y_scaled'])
plt.plot(future['ds'], y_pred)
plt.show()

# Trend = k + ed.dot(A, delta)) * t + (m + ed.dot(A, gamma)
trend_post = ed.copy(trend_loc, {k: qk, m: qm, delta:qdelta})
seas_post = ed.copy(seas_loc, {beta: qbeta})
trend_pred, seas_pred = sess.run([trend_post, seas_post], 
                                 feed_dict={t: X_test['t'],
                             A: X_test['A'], 
                             X: X_test['X'].as_matrix(), 
                             sigmas: X_test['sigmas'], 
                             t_change: changepoints_t,
                             tau: changepoint_prior_scale})

# Plot trend
plt.plot(future['ds'], future['y_scaled'])
plt.plot(future['ds'], trend_pred)
plt.show()

# Plot seasonal
plt.plot(future['ds'], future['y_scaled'])
plt.plot(future['ds'], seas_pred)
plt.show()

# Plot trend + seasonal (no noise)
plt.plot(future['ds'], future['y_scaled'])
plt.plot(future['ds'], trend_pred + seas_pred) # no noise
plt.show()

def smape(y_true, y_pred):
    denominator = (np.abs(y_true) + np.abs(y_pred))
    diff = np.abs(y_true - y_pred) / denominator
    diff[denominator == 0] = 0.0
    return 200 * np.mean(diff)

# Metrics 
mape = np.mean(np.abs((future['y_scaled'] - y_pred) / future['y_scaled'])) * 100
mse = ((future['y_scaled'] - y_pred) ** 2).mean()
#mse = tf.reduce_mean(tf.square(y_pred - future['y_scaled']))
print("MAPE = %f" % mape)
print("SMAPE = %f" % mape)
print("MSE = %f" % mse)

## Visualize

In [None]:
# def visualise(X_data, y_data, k, m, sigma_obs, beta, delta, n_samples=10):
#     k = k.sample(n_samples).eval()
#     m = m.sample(n_samples).eval()
#     sigma_obs = sigma_obs.sample(n_samples).eval()
# #     beta = beta.sample(n_samples).eval()
# #     delta = delta.sample(n_samples).eval()
#     plt.scatter(X_data.iloc[:, 0], y_data)
#     inputs = np.linspace(-1, 1, num=400)
#     for ns in range(n_samples):
#         output = ???
#     plt.plot(inputs, output)
# visualise(data['X'], data['y'],k, m, sigma_obs, beta, delta)
# visualise(data['X'], data['y'],qk, qm, qsigma_obs, qbeta, qdelta)

## Forecast

In [None]:
def piecewise_linear(t, deltas, k, m, changepoint_ts):
    # Intercept changes
    gammas = -changepoint_ts * deltas
    # Get cumulative slope and intercept at each t
    k_t = k * np.ones_like(t)
    m_t = m * np.ones_like(t)
    for s, t_s in enumerate(changepoint_ts):
        indx = t >= t_s
        k_t[indx] += deltas[s]
        m_t[indx] += gammas[s]
    return k_t * t + m_t


def add_group_component(components, name, group):
    new_comp = components[components['component'].isin(set(group))].copy()
    new_comp['component'] = name
    components = components.append(new_comp)
    return components
    
def predict_seasonal_components(df, params, data, y_scale, interval_width=0.8):
    # TODO: what is interval width?
    seasonal_features, _ = make_seasonality_features(df, yearly=True, weekly=True)
    lower_p = 100 * (1.0 - interval_width) / 2 
    upper_p = 100 * (1.0 + interval_width) / 2
    
    components = pd.DataFrame({
        'col': np.arange(seasonal_features.shape[1]),
        'component': [x.split('_delim_')[0] for x in seasonal_features.columns],
    })
    
    # Add a total for seasonal 
    components = components.append(pd.DataFrame({
        'col': np.arange(seasonal_features.shape[1]),
        'component': 'seasonal',
    }))
    
    X = seasonal_features.as_matrix()
    data = {}
    for component, features in components.groupby('component'):
        cols = features.col.tolist()
        comp_beta = params['beta'][:, cols]
        comp_features = X[:, cols]
        comp = (np.matmul(comp_features, comp_beta.transpose()) 
                * y_scale)
        data[component] = np.nanmean(comp, axis=1)
        data[component + '_lower'] = np.nanpercentile(comp, lower_p, axis=1)
        data[component + '_upper'] = np.nanpercentile(comp, upper_p, axis=1)
    return pd.DataFrame(data)
    

def predict(df, history, params, data, nsample=500):
    # get posterior predictive mean
    k = np.nanmean(params['k'])
    m = np.nanmean(params['m'])
    deltas = np.nanmean(params['delta'], axis=0)
    print(k, m, deltas)
    
    # predict trend
    y_scale = history['y'].max()
    df['trend'] = piecewise_linear(np.array(df['t']), deltas, k, m, data["t_change"])
    df['trend'] =   df['trend'] * y_scale
    
    # predict seasonal components 
    seasonal_components = predict_seasonal_components(df, params, data, y_scale)
    #TODO: intervals = predict_uncertainty(df)

    df = pd.concat([df, seasonal_components], axis=1)
    df['y'] = df['trend'] + df['seasonal']
    return df

def make_future_dataframe(history, periods, freq='D'):
    # create future time series for forecasting
    t_start = history['ds'].min()
    last_d = history['ds'].max()
    t_scale = last_d - t_start
    dates = pd.date_range(start=last_d, periods=periods + 1, freq=freq)
    dates = dates[dates > last_d]
    dates = dates[:periods]
    
    future = pd.DataFrame({"ds": dates})
    future['ds'] = pd.to_datetime(future['ds'])
    future.reset_index(inplace=True, drop=True)
    future['t'] = (future['ds'] - t_start) / t_scale
    return future

In [None]:
future = make_future_dataframe(history, 365)
print("History:\n", history.tail())
print()
print("Future:\n",future.head())

forecast = predict(future, history, model_params, data)
plt.plot(history["ds"], history["y"])
plt.plot(forecast["ds"], forecast["y"])
forecast.head()

# Facebook Prophet


In [None]:
from fbprophet import Prophet

# Facebook Prophet
df = pd.read_csv("./data/Selena_en.wikipedia.org_all-access_spider.csv")
df["y"] = np.log(df["y"])
df.head()
m_pp = Prophet()
m_pp.fit(df)

In [None]:
future = m_pp.make_future_dataframe(periods=60)
future.tail()
forecast = m_pp.predict(future)
forecast[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
m_pp.plot(forecast);

In [None]:
m_pp.plot_components(forecast);