# Project: Web Traffic Forecasting


In [None]:
import edward as ed
import numpy as np
import tensorflow as tf
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from edward.models import Normal, Laplace, Empirical

In [None]:
%matplotlib inline

## Data

In [None]:
df = pd.read_csv("./data/example_wp_peyton_manning.csv")
df["y"] = np.log(df["y"])
df.tail()

## Model

In [None]:
# Preprocess Data
def fourier_series(dates, period, order):
    # to days since epoch
    t = np.array((dates - pd.datetime(1970, 1, 1))
                 .dt.total_seconds()
                 .astype(np.float)) / (3600 * 24.0)
    return np.column_stack([
        fun((2.0 * (i + 1) * np.pi * t / period))
        for i in range(order)
        for fun in (np.sin, np.cos)
    ])
    
def seasonal_feature(dates, period, fourier_order, name):
    features = fourier_series(dates, period, fourier_order)
    columns = ['{}_delim_{}'.format(name, i + 1) for i in range(features.shape[1])]
    return pd.DataFrame(features, columns=columns)
    
def make_seasonality_features(history, yearly=True, weekly=True, prior_scale=10.0):
    start = history['ds'].min()
    end = history['ds'].max()
    dt = history['ds'].diff()
    min_dt = dt.iloc[dt.nonzero()[0]].min() # spacing

    seasonal_features = []
    prior_scales = []
    
    # Year seasonality
    yearly_disable = end - start < pd.Timedelta(days=730)
    if yearly or not yearly_disable:
        features = seasonal_feature(history['ds'],
                                    period=365.25,
                                    fourier_order=10,
                                    name='yearly')
        seasonal_features.append(features)
        prior_scales.extend([prior_scale] * features.shape[1])
        
    
    # Weekly seasonality
    weekly_disable = ((end - start < pd.Timedelta(weeks=2)) or
                      (min_dt >= pd.Timedelta(weeks=1)))
    if weekly or not weekly_disable:
        features = seasonal_feature(history['ds'],
                                    period=7,
                                    fourier_order=3,
                                    name='weekly')
        seasonal_features.append(features)
        prior_scales.extend([prior_scale] * features.shape[1])
        
    # TODO: holiday
    if len(seasonal_features) == 0:
        seasonal_features.append(
            pd.DataFrame({'zeros': np.zeros(history.shape[0])})
        )
        prior_scales.append(1.0)
    return pd.concat(seasonal_features, axis=1), prior_scales


def get_changepoints(history, n_changepoints=25):
    # Place potential changepoints evenly through first 80% of history
    # Return changepoints_t in t index
    
    hist_size = np.floor(history.shape[0] * 0.8)
    if n_changepoints == -1 or n_changepoints + 1 > hist_size:
        n_changepoints = hist_size - 1
            
    # set changepoints in df['ds'] timestamps
    if n_changepoints == 0:
        changepoints = [] # no changepoints
    else:
        cp_indexes = (
            np.linspace(0, hist_size, n_changepoints + 1)
            .round()
            .astype(np.int)
        )
        changepoints = history.iloc[cp_indexes]['ds'].tail(-1)
    
    # set changepoints_t in t index
    if len(changepoints) > 0:
        start = history['ds'].min()
        t_scale = history['ds'].max() - start
        changepoints_t = np.sort(np.array((changepoints - start) / t_scale))
    else:
        changepoints_t = np.array([0])  # dummy changepoint
    
    # set matrix 
    A = np.zeros((history.shape[0], len(changepoints_t)))
    for i, t_i in enumerate(changepoints_t):
        A[history['t'].values >= t_i, i] = 1
        
    return changepoints_t, A 

In [None]:
def setup_dataframe(df):
    # Adds a time index t and y_scaled
    df = df[df['y'].notnull()].copy()
    df['y'] = pd.to_numeric(df['y'])
    if np.isinf(df['y'].values).any():
        raise ValueError("Found infinity in column y")
        
    df['ds'] = pd.to_datetime(df['ds'])
    if df['ds'].isnull().any():
        raise ValueError("Found NaN in column ds")
    df = df.sort_values('ds')
    df.reset_index(inplace=True, drop=True)
    
    # Scale ds and y
    t_start = df['ds'].min()
    t_scale = df['ds'].max() - t_start
    if t_scale == 0:
        raise ValueError("Timeseries start == end")
    y_scale = df['y'].max()
    if y_scale == 0:
        y_scale = 1
    df['t'] = (df['ds'] - t_start) / t_scale
    df['y_scaled'] = df['y'] / y_scale
    return df

# Preprocess data
history = setup_dataframe(df)
print("History dataframe: %d\n" % history.shape[0], history.head())
print()
    
# Add seasonality features
seasonal_features, prior_scales = make_seasonality_features(history)
print("Seasonal_features: %d\n" % len(prior_scales))
print(seasonal_features.columns)
print()
    
# Add changepoints (-1==auto)
changepoints_t, A = get_changepoints(history, n_changepoints=25)
print("Changepoints: %d" % len(changepoints_t))
    
changepoint_prior_scale=0.05
data = {
    'T': history.shape[0], # sampling size
    'K': seasonal_features.shape[1], # number of seasonal factors
    'S': len(changepoints_t), # number of change points
    'y': history['y_scaled'].as_matrix(), # time series
    't': history['t'].as_matrix(), # day
    'A': A, # split indicator
    't_change': changepoints_t, # index of change points
    'X': seasonal_features, # seasonal vectors
    'sigmas': prior_scales, # scale on seasonality prior
    'tau': changepoint_prior_scale, # scale on change point prior
}

In [None]:
# Model
T = data['T']
S = data['S']
K = data['K']

t = tf.placeholder(tf.float32, shape=(T,), name="t")        # time index
A = tf.placeholder(tf.float32, shape=(T, S), name="A")      # changepoint indicators
t_change = tf.placeholder(tf.float32, shape=(S), name="t_change") # changepoints_t
X = tf.placeholder(tf.float32, shape=(T, K), name="X")      # season vectors
sigmas = tf.placeholder(tf.float32, shape=(K,), name="sigmas")  # scale on seasonality prior
tau = tf.placeholder(tf.float32, shape=(), name="tau")      # scale on changepoints prior
       
k = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))           # initial slope
m = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))           # initial intercept
sigma_obs = Normal(loc=tf.zeros(1), scale=0.5*tf.ones(1))   # noise
    
delta = Laplace(loc=tf.zeros(S), scale=tau*tf.ones(S))      # changepoint rate adjustment
gamma = tf.multiply(-t_change, delta, name="gamma")

beta = Normal(loc=tf.zeros(K), scale=sigmas*tf.ones(K))     # seasonal
    
y = Normal(loc = (k * tf.ones(T) + ed.dot(A, delta)) * t 
           + m * tf.ones(T) + ed.dot(A, gamma)
           + ed.dot(X, beta),
           scale = sigma_obs * tf.ones(T))

## Inference

In [None]:
# Inference
ITR = 5000                       # Number of samples.

# Init k, m
def init_km(df):
    i0, i1 = df['ds'].idxmin(), df['ds'].idxmax()
    T = df['t'].iloc[i1] - df['t'].iloc[i0]
    k = (df['y_scaled'].iloc[i1] - df['y_scaled'].iloc[i0]) / T
    m = df['y_scaled'].iloc[i0] -  k * df['t'].iloc[i0]
    return (k, m)

kinit, minit = init_km(history)
print("Initial slope / intercept: %f, %f" % (kinit, minit))
qk = Empirical(params=tf.Variable(kinit * tf.ones([ITR, 1])))
qm = Empirical(params=tf.Variable(minit * tf.ones([ITR, 1])))
qsigma_obs = Empirical(params=tf.Variable(tf.ones([ITR, 1])))
qbeta = Empirical(params=tf.Variable(tf.zeros([ITR, K])))
qdelta = Empirical(params=tf.Variable(tf.zeros([ITR, S])))

inference = ed.HMC({k: qk, m: qm, sigma_obs: qsigma_obs, beta: qbeta, delta:qdelta}, 
                   data={y: data['y'], 
                         t: data['t'], 
                         A: data['A'], 
                         t_change: data['t_change'],
                         X: data['X'].as_matrix(), 
                         sigmas: data['sigmas'], 
                         tau: data['tau']})
inference.run(step_size=1e-3)


In [None]:
# Sample Posterior
sess = ed.get_session()
nsample = 500
model_params = {
    "k": sess.run(qk.sample(nsample)), 
    "m": sess.run(qm.sample(nsample)),
    "sigma_obs": sess.run(qsigma_obs.sample(nsample)),
    "beta": sess.run(qbeta.sample(nsample)),
    "delta": sess.run(qdelta.sample(nsample))
}
print("Posterior Mean: k = %f, m = %f" % (np.nanmean(model_params['k']), 
                                          np.nanmean(model_params['m'])))
print("delta = ", np.nanmean(model_params['delta'], axis=0))

## Visualize

In [None]:
# def visualise(X_data, y_data, k, m, sigma_obs, beta, delta, n_samples=10):
#     k = k.sample(n_samples).eval()
#     m = m.sample(n_samples).eval()
#     sigma_obs = sigma_obs.sample(n_samples).eval()
# #     beta = beta.sample(n_samples).eval()
# #     delta = delta.sample(n_samples).eval()
#     plt.scatter(X_data.iloc[:, 0], y_data)
#     inputs = np.linspace(-1, 1, num=400)
#     for ns in range(n_samples):
#         output = ???
#     plt.plot(inputs, output)
# visualise(data['X'], data['y'],k, m, sigma_obs, beta, delta)
# visualise(data['X'], data['y'],qk, qm, qsigma_obs, qbeta, qdelta)

## Forecast

In [None]:
def piecewise_linear(t, deltas, k, m, changepoint_ts):
    # Intercept changes
    gammas = -changepoint_ts * deltas
    # Get cumulative slope and intercept at each t
    k_t = k * np.ones_like(t)
    m_t = m * np.ones_like(t)
    for s, t_s in enumerate(changepoint_ts):
        indx = t >= t_s
        k_t[indx] += deltas[s]
        m_t[indx] += gammas[s]
    return k_t * t + m_t


def add_group_component(components, name, group):
    new_comp = components[components['component'].isin(set(group))].copy()
    new_comp['component'] = name
    components = components.append(new_comp)
    return components
    
def predict_seasonal_components(df, params, data, y_scale, interval_width=0.8):
    # TODO: what is interval width?
    seasonal_features, _ = make_seasonality_features(df, yearly=True, weekly=True)
    lower_p = 100 * (1.0 - interval_width) / 2 
    upper_p = 100 * (1.0 + interval_width) / 2
    
    components = pd.DataFrame({
        'col': np.arange(seasonal_features.shape[1]),
        'component': [x.split('_delim_')[0] for x in seasonal_features.columns],
    })
    
    # Add a total for seasonal 
    components = components.append(pd.DataFrame({
        'col': np.arange(seasonal_features.shape[1]),
        'component': 'seasonal',
    }))
    
    X = seasonal_features.as_matrix()
    data = {}
    for component, features in components.groupby('component'):
        cols = features.col.tolist()
        comp_beta = params['beta'][:, cols]
        comp_features = X[:, cols]
        comp = (np.matmul(comp_features, comp_beta.transpose()) 
                * y_scale)
        data[component] = np.nanmean(comp, axis=1)
        data[component + '_lower'] = np.nanpercentile(comp, lower_p, axis=1)
        data[component + '_upper'] = np.nanpercentile(comp, upper_p, axis=1)
    return pd.DataFrame(data)
    

def predict(df, history, params, data, nsample=500):
    # get posterior predictive mean
    k = np.nanmean(params['k'])
    m = np.nanmean(params['m'])
    deltas = np.nanmean(params['delta'], axis=0)
    print(k, m, deltas)
    
    # predict trend
    y_scale = history['y'].max()
    df['trend'] = piecewise_linear(np.array(df['t']), deltas, k, m, data["t_change"])
    df['trend'] =   df['trend'] * y_scale
    
    # predict seasonal components 
    seasonal_components = predict_seasonal_components(df, params, data, y_scale)
    #TODO: intervals = predict_uncertainty(df)

    df = pd.concat([df, seasonal_components], axis=1)
    df['y'] = df['trend'] + df['seasonal']
    return df

def make_future_dataframe(history, periods, freq='D'):
    # create future time series for forecasting
    t_start = history['ds'].min()
    last_d = history['ds'].max()
    t_scale = last_d - t_start
    dates = pd.date_range(start=last_d, periods=periods + 1, freq=freq)
    dates = dates[dates > last_d]
    dates = dates[:periods]
    
    future = pd.DataFrame({"ds": dates})
    future['ds'] = pd.to_datetime(future['ds'])
    future.reset_index(inplace=True, drop=True)
    future['t'] = (future['ds'] - t_start) / t_scale
    return future

In [None]:
future = make_future_dataframe(history, 365)
print("History:\n", history.tail())
print()
print("Future:\n",future.head())

forecast = predict(future, history, model_params, data)
plt.plot(history["t"], history["y"])
plt.plot(forecast["t"], forecast["y"])
forecast.head()

# Facebook Prophet


In [None]:
from fbprophet import Prophet

# Facebook Prophet
df = pd.read_csv("./data/example_wp_peyton_manning.csv")
df["y"] = np.log(df["y"])
df.head()
m_pp = Prophet()
m_pp.fit(df)

In [None]:
future = m_pp.make_future_dataframe(periods=60)
future.tail()
forecast = m_pp.predict(future)
forecast[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
m_pp.plot(forecast);

In [None]:
m_pp.plot_components(forecast);