# Project: Web Traffic Forecasting


In [None]:
import edward as ed
import numpy as np
import tensorflow as tf
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import timedelta
from edward.models import Normal, Laplace, Empirical

from model import *


In [None]:
%matplotlib inline
import matplotlib
matplotlib.rcParams['figure.figsize'] = (24, 12)
matplotlib.rcParams['lines.linewidth'] = 2
matplotlib.rcParams['xtick.labelsize'] = 18
matplotlib.rcParams['ytick.labelsize'] = 18
matplotlib.rcParams['xtick.color'] = 'w'
matplotlib.rcParams['ytick.color'] = 'w'

## Data

In [None]:
FPATH = "./data/nfl_teams.csv"
SDATE = pd.datetime(2017, 7, 10)

def get_timeseries(path):
    df = pd.read_csv(path)
    timeseries = {}
    print("Loading timeseries:")
    for i, row in df.iterrows():
        ts = pd.DataFrame({"ds": row.index[1:], "views": row.values[1:]})
        timeseries[row.Page] = ts
        print(row.Page)
        #plt.plot(ts["ds"], np.log(ts["y"]))
        #plt.xticks(rotation=90)
        #plt.show()
    return timeseries

timeseries = get_timeseries(FPATH)

In [None]:
def split_train_test(df, sdate=pd.datetime(2017, 7, 10)):
    # Prepare dataframe
    df["y"] = df["views"].astype(float)
    df["y"] = np.log(df["y"])
    df = setup_dataframe(df)
    
    # Split data into train and test
    history = df[df['ds'] <= SDATE].copy()
    future = df[df['ds'] > SDATE].copy()
    print("[+] History: %d, Future: %d" % (history.shape[0], future.shape[0]))
    
    # Add a scaled t (time index) and y (#views)
    t_start = history['ds'].min()
    t_scale = history['ds'].max() - t_start
    if t_scale == 0:
        raise ValueError("Timeseries start == end")
    y_scale = history['y'].max()
    if y_scale == 0:
        y_scale = 1
    history['t'] = (history['ds'] - t_start) / t_scale
    history['y_scaled'] = history['y'] / y_scale
    future['t'] = (future['ds'] - t_start) / t_scale
    future['y_scaled'] = future['y'] / y_scale
    
    plt.plot(history['ds'],history['y'])
    plt.plot(future['ds'],future['y'])
    plt.xticks(rotation=90)
    plt.show()
    return (history, future, y_scale)
    

SDATE = pd.datetime(2017, 7, 10)
pages = ["Atlanta_Falcons_en.wikipedia.org_mobile-web_all-agents", "Dallas_Cowboys_en.wikipedia.org_mobile-web_all-agents"]
ts_data = []
#pages = [""]
#pages = list(timeseries.keys())[:-1]
#pages = pages[:-1]
for p in pages:
    print("Preparing timeseries ", p)
    df = timeseries[p]
    history, future, y_scale = split_train_test(df, SDATE)
    ts_data.append({
        "history": history, "future": future, "y_scale": y_scale
    })
    print(future.head())

In [None]:
# Extract features
holiday_en_us = ['2015-01-01', '2015-01-19', '2015-05-25', '2015-07-03', '2015-09-07', '2015-11-26', '2015-11-27', '2015-12-25', 
                 '2016-01-01', '2016-01-18', '2016-05-30', '2016-07-04', '2016-09-05', '2016-11-11', '2016-11-24', '2016-12-26', 
                 '2017-01-01', '2017-01-02', '2017-01-16', '2017-05-29', '2017-07-04', '2017-09-04', '2017-11-10', '2017-11-23', 
                 '2017-12-25',
                 '2015-02-14', '2016-02-14', '2017-02-14']
holidays = pd.DataFrame({
  'holiday': 'US public holiday',
  'ds': pd.to_datetime(holiday_en_us),
  'lower_window': -1,
  'upper_window': 0,
  'prior_scale': 10.0
})
holidays = None

def extract_features(df):
    seasonal_features, prior_scales = make_seasonality_features(df, 
                                                            yearly=True, weekly=True, 
                                                            holidays=holidays)
    K = seasonal_features.shape[1] # number of seasonal factors
    print("[+] %d Seasonal features" % K) #, list(seasonal_features.columns))
    if holidays is not None:
        holiday_ds = {}
        for feature in seasonal_features:
            if feature.split("_delim_")[0] in set(holidays['holiday']):
                holiday_ds[feature] = seasonal_features[seasonal_features[feature]==1.0].shape[0]
        print("%d Holidays" % len(holiday_ds), holiday_ds) 

    changepoints_t = get_changepoints(df, n_changepoints=25)
    S = len(changepoints_t) # number of change points
    print("[+] %d changepoints" % S)

    return  {
        't': df['t'].as_matrix(), # time index
        'A': get_changepoint_matrix(df, changepoints_t), # split indicator
        'X': seasonal_features, # seasonal vectors
        'sigmas': prior_scales, # scale on seasonality prior
        't_change': changepoints_t
    }


ts = ts_data[0]
print("Extracting features")
train_data = extract_features(ts["history"])
test_data = extract_features(ts["future"])
assert(all(train_data["X"].columns ==  test_data["X"].columns))
assert(len(train_data["t_change"]) ==  len(test_data["t_change"]))
print(train_data.keys())

## Model

In [None]:
def build_data(name, N_TS, S, K):
    with tf.name_scope(name):
        t = tf.placeholder(tf.float32, shape=None, name="t")              # time index
        A = tf.placeholder(tf.float32, shape=(None, S), name="A")         # changepoint indicators
        t_change = tf.placeholder(tf.float32, shape=(S), name="t_change") # changepoints_t
        X = tf.placeholder(tf.float32, shape=(None, K), name="X")         # season vectors
        sigmas = tf.placeholder(tf.float32, shape=(K,), name="sigmas")    # scale on seasonality prior
        return {
            "t": t, "A": A, "t_change": t_change, "X": X, "sigmas": sigmas 
        }
                                
def get_posts(params, posts, i=None):
    if i == None:
        return {
            params[i][k]:v for i, ps in posts.items() for k, v in ps.items()
        }
    else:
        p = {params[i][k]:v for k, v in posts[i].items()}
        if -1 in posts: # common parameters
            p.update({ params[-1][k]:v for k, v in posts[-1].items() })
        return p
    
def run_inference(name, data, params, posts, ITR=5000):
    # train_data and ts_data are global
    with tf.name_scope(name):
        data_dict = {data[k]:v for k, v in train_data.items()}
        # add y true
        for i in range(len(ts_data)):
            data_dict.update({
                params[i]["y"]: ts_data[i]["history"]["y_scaled"].as_matrix()
            })
        posts_dict = get_posts(params, posts)
        inference = ed.HMC(posts_dict, data=data_dict)
        inference.run(step_size=5e-4)
    
# Prediction
def evaluate(y_true, y_pred):
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    smape = np.mean(np.abs((y_true - y_pred)) / (np.abs((y_true + y_pred)))) * 200
    mse = ((y_true - y_pred) ** 2).mean()
    print("MAPE = %f" % mape)
    print("SMAPE = %f" % smape)
    print("MSE = %f" % mse)
    
def predict(name, data, params, posts):
    # test_data and ts_data are global
    with tf.name_scope(name):
        data_dict = {data[k]:v for k, v in test_data.items()}
        sess = ed.get_session()
        for i, ts in enumerate(ts_data):
            posts_dict = get_posts(params, posts, i=i)
            y_post = ed.copy(params[i]["y"], posts_dict) 
            y_pred = np.array([sess.run([y_post], 
                                    feed_dict=data_dict) for _ in range(500)]).mean(axis=0)[0]
            y_true = ts["future"]["y_scaled"]
            evaluate(y_true, y_pred)
            plt.plot(ts["future"]["ds"], y_true)
            plt.plot(ts["future"]["ds"], y_pred)
            plt.xticks(rotation=90)
            plt.show()
        
#kmean, kstddev = sess.run([model1.posts[i]["k"].mean(), model1.posts[i]["k"].stddev()])
      #      print("Inferred posterior k: mean = %f, stddev = %f" % (kmean, kstddev))

In [None]:
class Model1(object):
    def __init__(self, N_TS, S, K):
        self.N_TS = N_TS
        self.S = S
        self.K = K
        self.name = "model1"
        
    def build_model(self):
        with tf.name_scope(self.name):
            self.data = build_data(self.name, self.N_TS, self.S, self.K)
            self.params = {}      
            for i in range(self.N_TS):
                k = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))     # initial slope
                m = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))     # initial intercept
                sigma_obs = Normal(loc=tf.zeros(1), scale=0.5*tf.ones(1))   # noise
                tau = Normal(loc=tf.ones(1) * 0.05, scale=1.*tf.ones(1))    # changepoint prior scale
                delta = Laplace(loc=tf.zeros(self.S), scale=tau*tf.ones(self.S))    # changepoint rate adjustment
                gamma = tf.multiply(-self.data["t_change"], delta)
                beta = Normal(loc=tf.zeros(self.K), 
                              scale=self.data["sigmas"]*tf.ones(self.K))      # seasonal
                trend_loc = (k + ed.dot(self.data["A"], delta)) * self.data["t"] + \
                            (m + ed.dot(self.data["A"], gamma))
                seas_loc = ed.dot(self.data["X"], beta)
                y = Normal(loc = trend_loc + seas_loc, scale = sigma_obs)
                self.params[i] = {
                    "k": k, "m": m,
                    "sigma_obs": sigma_obs,
                    "tau": tau, "delta": delta,
                    "beta": beta,
                    "trend_loc": trend_loc, "seas_loc": seas_loc,
                    "y": y
                }
                             
    def build_posts(self, ITR, ts_data):
        assert(self.N_TS == len(ts_data))
        self.ITR = ITR
        self.posts = {}
        with tf.name_scope(self.name):
            for i in range(self.N_TS): 
                kinit, minit = init_km(ts_data[i]["history"])
                print("[+] Initial slope / intercept: %f, %f" % (kinit, minit))
                qbeta = Empirical(params=tf.Variable(tf.zeros([ITR, self.K])))
                qk = Empirical(params=tf.Variable(kinit * tf.ones([ITR, 1])))
                qm = Empirical(params=tf.Variable(minit * tf.ones([ITR, 1])))
                qsigma_obs = Empirical(params=tf.Variable(tf.ones([ITR, 1])))
                qdelta = Empirical(params=tf.Variable(tf.zeros([ITR, self.S])))
                qtau = Empirical(params=tf.Variable(0.05 * tf.ones([ITR, 1])))
                self.posts[i] = {
                    "k": qk, "m": qm,
                    "sigma_obs": qsigma_obs, 
                    "delta": qdelta,
                    "tau": qtau,
                    "beta": qbeta
                }

In [None]:
class Model2(object):
    def __init__(self, N_TS, S, K):
        self.N_TS = N_TS
        self.S = S
        self.K = K
        self.name = "model2"
        
    def build_model(self):
        with tf.name_scope(self.name):
            self.data = build_data(self.name, self.N_TS, self.S, self.K)
            self.params = {}      
             
            # Common prior
            beta = Normal(loc=tf.zeros(self.K), scale=self.data["sigmas"]*tf.ones(self.K)) 
            self.params[-1] = {"beta": beta}
            
            for i in range(self.N_TS):
                k = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))     # initial slope
                m = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))     # initial intercept
                sigma_obs = Normal(loc=tf.zeros(1), scale=0.5*tf.ones(1))   # noise
                tau = Normal(loc=tf.ones(1) * 0.05, scale=1.*tf.ones(1))    # changepoint prior scale
                delta = Laplace(loc=tf.zeros(self.S), scale=tau*tf.ones(self.S))    # changepoint rate adjustment
                gamma = tf.multiply(-self.data["t_change"], delta)

                trend_loc = (k + ed.dot(self.data["A"], delta)) * self.data["t"] + \
                            (m + ed.dot(self.data["A"], gamma))
                seas_loc = ed.dot(self.data["X"], beta)
                y = Normal(loc = trend_loc + seas_loc, scale = sigma_obs)
                self.params[i] = {
                    "k": k, "m": m,  "sigma_obs": sigma_obs,
                    "tau": tau, "delta": delta,
                    "trend_loc": trend_loc, "seas_loc": seas_loc, "y": y
                }
                             
    def build_posts(self, ITR, ts_data):
        assert(self.N_TS == len(ts_data))
        self.ITR = ITR
        self.posts = {}
        with tf.name_scope(self.name):
            qbeta = Empirical(params=tf.Variable(tf.zeros([ITR, self.K])))
            self.posts[-1] = {"beta": qbeta }
            for i in range(self.N_TS): 
                kinit, minit = init_km(ts_data[i]["history"])
                print("[+] Initial slope / intercept: %f, %f" % (kinit, minit))
                qk = Empirical(params=tf.Variable(kinit * tf.ones([ITR, 1])))
                qm = Empirical(params=tf.Variable(minit * tf.ones([ITR, 1])))
                qsigma_obs = Empirical(params=tf.Variable(tf.ones([ITR, 1])))
                qdelta = Empirical(params=tf.Variable(tf.zeros([ITR, self.S])))
                qtau = Empirical(params=tf.Variable(0.05 * tf.ones([ITR, 1])))
                self.posts[i] = {
                    "k": qk, "m": qm,
                    "sigma_obs": qsigma_obs, 
                    "delta": qdelta,
                    "tau": qtau,
                }

In [None]:
class Model3(object):
    def __init__(self, N_TS, S, K):
        self.N_TS = N_TS
        self.S = S
        self.K = K
        self.name = "model2"
        
    def build_model(self):
        with tf.name_scope(self.name):
            self.data = build_data(self.name, self.N_TS, self.S, self.K)
            self.params = {}      
             
            # Common prior
            gbeta = Normal(loc=tf.zeros(self.K), scale=self.data["sigmas"]*tf.ones(self.K)) 
            self.params[-1] = {"gbeta": gbeta}
            
            for i in range(self.N_TS):
                k = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))     # initial slope
                m = Normal(loc=tf.zeros(1), scale=5.0*tf.ones(1))     # initial intercept
                sigma_obs = Normal(loc=tf.zeros(1), scale=0.5*tf.ones(1))   # noise
                tau = Normal(loc=tf.ones(1) * 0.05, scale=1.*tf.ones(1))    # changepoint prior scale
                delta = Laplace(loc=tf.zeros(self.S), scale=tau*tf.ones(self.S))    # changepoint rate adjustment
                gamma = tf.multiply(-self.data["t_change"], delta)
                trend_loc = (k + ed.dot(self.data["A"], delta)) * self.data["t"] + \
                            (m + ed.dot(self.data["A"], gamma))
                beta = Normal(loc=tf.zeros(self.K), scale=self.data["sigmas"]*tf.ones(self.K)) 
                seas_loc = ed.dot(self.data["X"], beta)
                y = Normal(loc = trend_loc + seas_loc, scale = sigma_obs)
                self.params[i] = {
                    "k": k, "m": m,  "sigma_obs": sigma_obs,
                    "tau": tau, "delta": delta, "beta": beta,
                    "trend_loc": trend_loc, "seas_loc": seas_loc, "y": y
                }
                             
    def build_posts(self, ITR, ts_data):
        assert(self.N_TS == len(ts_data))
        self.ITR = ITR
        self.posts = {}
        with tf.name_scope(self.name):
            qgbeta = Empirical(params=tf.Variable(tf.zeros([ITR, self.K])))
            self.posts[-1] = {"gbeta": qgbeta }
            for i in range(self.N_TS): 
                kinit, minit = init_km(ts_data[i]["history"])
                print("[+] Initial slope / intercept: %f, %f" % (kinit, minit))
                qk = Empirical(params=tf.Variable(kinit * tf.ones([ITR, 1])))
                qm = Empirical(params=tf.Variable(minit * tf.ones([ITR, 1])))
                qsigma_obs = Empirical(params=tf.Variable(tf.ones([ITR, 1])))
                qdelta = Empirical(params=tf.Variable(tf.zeros([ITR, self.S])))
                qtau = Empirical(params=tf.Variable(0.05 * tf.ones([ITR, 1])))
                qbeta = Empirical(params=tf.Variable(tf.zeros([ITR, self.K])))
                self.posts[i] = {
                    "k": qk, "m": qm,
                    "sigma_obs": qsigma_obs, 
                    "delta": qdelta, "beta": qbeta,
                    "tau": qtau,
                }

In [None]:
model1 = Model1(len(ts_data),
                len(train_data["t_change"]),         # number of change points
                train_data["X"].shape[1])            # number of seasonal factors
model1.build_model()

ITR = 5000                       # Number of samples.
model1.build_posts(ITR, ts_data)
run_inference(model1.name, model1.data, model1.params, model1.posts, ITR)
predict(model1.name, model1.data, model1.params, model1.posts)

In [None]:
model1 = Model2(len(ts_data),
                len(train_data["t_change"]),         # number of change points
                train_data["X"].shape[1])            # number of seasonal factors
model1.build_model()

ITR = 5000                       # Number of samples.
model1.build_posts(ITR, ts_data)
run_inference(model1.name, model1.data, model1.params, model1.posts, ITR)
predict(model1.name, model1.data, model1.params, model1.posts)

In [None]:
model1 = Model3(len(ts_data),
                len(train_data["t_change"]),         # number of change points
                train_data["X"].shape[1])            # number of seasonal factors
model1.build_model()

ITR = 5000                       # Number of samples.
model1.build_posts(ITR, ts_data)
run_inference(model1.name, model1.data, model1.params, model1.posts, ITR)
predict(model1.name, model1.data, model1.params, model1.posts)

In [None]:
# Benchmark
def median_model(train, size, p=-50):
    visits = np.nan_to_num(np.nanmedian(train[-p:]))
    return np.ones(size) * visits

for i, ts in enumerate(ts_data):
    print("Median model for %d" % i)
    y_true = ts["future"]["y_scaled"]
    y_pred_median = median_model(ts["history"]["y_scaled"], len(y_true))
    evaluate(y_true, y_pred_median)
    print()

In [None]:
# Training error 
# y_train_pred = np.array([sess.run([y_post], 
#                                   feed_dict={t: X_train['t'],
#                                              A: X_train['A'], X: X_train['X'].as_matrix(), 
#                                              sigmas: X_train['sigmas'], t_change: changepoints_t}
#                                                 #tau: changepoint_prior_scale}))
#                                  ) for _ in range(500)]).mean(axis=0)[0]

In [None]:
# Posterior check
kmean, kstddev = sess.run([qk.mean(), qk.stddev()])
print("Inferred posterior k: mean = %f, stddev = %f" % (kmean, kstddev))
mmean, mstddev = sess.run([qm.mean(), qm.stddev()])
print("Inferred posterior m: mean = %f, stddev = %f" % (mmean, mstddev))
tau_mean, tau_stddev = sess.run([qtau.mean(), qtau.stddev()])
print("Inferred posterior tau: mean = %f, stddev = %f" % (tau_mean, tau_stddev))


noise_mean, noise_stddev = sess.run([qsigma_obs.mean(), qsigma_obs.stddev()])
print("Inferred posterior noise: mean = %f, stddev = %f" % (noise_mean, noise_stddev))

nburn = 500
stride = 10
sns.distplot(qk.params.eval()[nburn:ITR:stride])
plt.show()
sns.distplot(qm.params.eval()[nburn:ITR:stride])
plt.show()

sns.distplot(qtau.params.eval()[nburn:ITR:stride])
plt.show()