In [3]:
import itertools
import sys

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error

from statsmodels.tsa.holtwinters import ExponentialSmoothing

sys.path.append("..")
from utils import create_onedrive_directdownload

In [2]:
plt.style.available

['Solarize_Light2',
 '_classic_test_patch',
 '_mpl-gallery',
 '_mpl-gallery-nogrid',
 'bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn-v0_8',
 'seaborn-v0_8-bright',
 'seaborn-v0_8-colorblind',
 'seaborn-v0_8-dark',
 'seaborn-v0_8-dark-palette',
 'seaborn-v0_8-darkgrid',
 'seaborn-v0_8-deep',
 'seaborn-v0_8-muted',
 'seaborn-v0_8-notebook',
 'seaborn-v0_8-paper',
 'seaborn-v0_8-pastel',
 'seaborn-v0_8-poster',
 'seaborn-v0_8-talk',
 'seaborn-v0_8-ticks',
 'seaborn-v0_8-white',
 'seaborn-v0_8-whitegrid',
 'tableau-colorblind10']

In [5]:
onedrive_link = "https://1drv.ms/u/s!AiqdXCxPTydhofYbhA1ASJ6RZ3OjTw?e=HpMMRY"
converted_link = create_onedrive_directdownload(onedrive_link)
print(f"{converted_link}")
df = pd.read_csv(converted_link, index_col="Month", parse_dates=True)

df.index.freq = "MS"

df.head()

https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBaXFkWEN4UFR5ZGhvZlliaEExQVNKNlJaM09qVHc_ZT1IcE1NUlk/root/content


Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121


In [6]:
print(df.shape)

(144, 1)


In [7]:
# assume the forecast horizon we care about is 12
# validate over 10 steps
h = 12
steps = 10
Ntest = len(df) - h - steps + 1

In [8]:
# configuration hyperparameters to try
trend_type_list    = ["add", "mul"]
seasonal_type_list = ["add", "mul"]
damped_trend_list  = [True, False]
init_method_list   = ["estimated", "heuristic", "legacy-heuristic"]
use_boxcox_list    = [True, False, 0]

In [None]:
# NOTE: statsmodels documentation states that 'log' is an acceptable input
#       for use_boxcox. This is false.

In [9]:
def walk_forward(
    trend_type,
    seasonal_type,
    damped_trend,
    init_method,
    use_boxcox,
    debug=False):
    
    # store errors
    errors = []
    seen_last = False
    steps_completed = 0
    
    for end_of_train in range(Ntest, len(df) - h + 1):
        # We don't have to manually "add" the data to our dataset
        # Just index it at the right points - this is a "view" not a "copy"
        # So it doesn't take up any extra space or computation
        train = df.iloc[: end_of_train]
        test  = df.iloc[end_of_train : end_of_train + h]
        
        if test.index[-1] == df.index[-1]:
            seen_last = True
            
        steps_completed += 1
        
        hw = ExponentialSmoothing(
            train["Passengers"],
            initialization_method = init_method,
            trend = trend_type,
            damped_trend = damped_trend,
            seasonal = seasonal_type,
            seasonal_periods = 12,
            use_boxcox = use_boxcox
        )
        res_hw = hw.fit()
        
        # compute error for the forecast horizon
        fcast = res_hw.forecast(h)
        error = mean_squared_error(test["Passengers"], fcast)
        errors.append(error)
        
    if debug:
        print("seen_last:", seen_last)
        print("steps completed:", steps_completed)
        
    return np.mean(errors)

In [10]:
# test our function
walk_forward(
    trend_type = "add",
    seasonal_type = "add",
    damped_trend = False,
    init_method = "legacy-heuristic",
    use_boxcox = 0, 
    debug = True
)

seen_last: True
steps completed: 10


2521.0271721807176

In [14]:
# Iterate through all possible options (i.e. grid search)
tuple_of_option_lists = (
    trend_type_list,
    seasonal_type_list,
    damped_trend_list,
    init_method_list,
    use_boxcox_list
)

for x in itertools.product(*tuple_of_option_lists):
    print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [16]:
best_score = float("inf")
best_options = None
for x in itertools.product(*tuple_of_option_lists):
    score = walk_forward(*x)
    
    if score < best_score:
        print("Best score so far:", score)
        best_score = score
        best_options = x

Best score so far: 412.81726694149745
Best score so far: 412.7068452245773
Best score so far: 320.66411466530855


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best score so far: 305.65933493126124


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best score so far: 284.55236957354686


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best score so far: 268.62187164455725


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


In [17]:
print("best score:", best_score)

trend_type, seasonal_type, damped_trend, init_method, use_boxcox = best_options
print("trend_type", trend_type)
print("seasonal_type", seasonal_type)
print("damped_trend", damped_trend)
print("init_method", init_method)
print("use_boxcox", use_boxcox)

best score: 268.62187164455725
trend_type mul
seasonal_type mul
damped_trend True
init_method legacy-heuristic
use_boxcox False
