In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [15]:
df = pd.read_csv('airline_passengers.csv', index_col='Month', parse_dates=True)

In [16]:
df.index.freq = 'MS'

In [17]:
df.index

DatetimeIndex(['1949-01-01', '1949-02-01', '1949-03-01', '1949-04-01',
               '1949-05-01', '1949-06-01', '1949-07-01', '1949-08-01',
               '1949-09-01', '1949-10-01',
               ...
               '1960-03-01', '1960-04-01', '1960-05-01', '1960-06-01',
               '1960-07-01', '1960-08-01', '1960-09-01', '1960-10-01',
               '1960-11-01', '1960-12-01'],
              dtype='datetime64[ns]', name='Month', length=144, freq='MS')

In [18]:
df

Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121
...,...
1960-08-01,606
1960-09-01,508
1960-10-01,461
1960-11-01,390


In [19]:
df.shape

(144, 1)

In [25]:
# Assume forcast horizont 12
# Validate over 10 steps
h = 12
steps = 10
N_test_start = len(df) - h - steps + 1
N_test_start # индекс начала тестовой выборки для того что бы получить 10 шагов валидации до конца истории


123

In [26]:
N_test_end = len(df) - h + 1
N_test_end

133

In [28]:
# Conf hyperparameters to try
trend_type_list = ['add', 'mul']
seasonal_type_list = ['add', 'mul']
damped_trend_list = [True, False]
init_method_list = ['estimated', 'heuristic', 'legacy-heuristic']
use_boxcox_list = [True, False, 0] # 0  - for log transform. error in docs

In [35]:
def walkforward(
    trend_type,
    seasonal_type,
    damped_trend,
    init_method,
    use_boxcox,
    debug=False):
    
    # store errors
    errors = []
    seen_last = False
    steps_completed = 0
    
    for end_of_train in range(N_test_start, N_test_end):
        train = df[:end_of_train]
        test = df[end_of_train:end_of_train+h]
        
        if test.index[-1] == df.index[-1]:
            seen_last = True
        
        steps_completed += 1
        
        hw = ExponentialSmoothing(
            train['Passengers'],
            initialization_method=init_method,
            trend=trend_type,
            damped_trend=damped_trend,
            seasonal=seasonal_type,
            seasonal_periods=12,
            use_boxcox=use_boxcox)
        res_hw = hw.fit()
        
        # compute error for each forcast
        fcast = res_hw.forecast(h)
        error = mean_squared_error(test['Passengers'], fcast)
        errors.append(error)
    
    if debug:
        print('seen_last:', seen_last)
        print('steps completed:', steps_completed)
    
    return np.mean(errors)

In [38]:
# single test our func
walkforward('add', 'add', False, 'legacy-heuristic', 0, True)

seen_last: True
steps completed: 10


2521.0271721807176

In [39]:
# Create grid-search iterator
tuple_of_option_lists = (
    trend_type_list,
    seasonal_type_list,
    damped_trend_list,
    init_method_list,
    use_boxcox_list,
)

In [41]:
for x in itertools.product(*tuple_of_option_lists):
    print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [44]:
# Начальное значение ошибки задаю как бесконечность, дальше она должна снижаться и обновляться
best_score = float('inf')
best_options = None

In [45]:
for x in itertools.product(*tuple_of_option_lists):
    # средняя ошибка на 10 walk-forward проходах для заданного кортежа параметров x
    score = walkforward(*x)
    
    if score < best_score:
        print('Best_score so far:', score)
        best_score = score
        best_options = x

Best_score so far: 412.81726694149745
Best_score so far: 412.7068452245773
Best_score so far: 320.66411466530855


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best_score so far: 305.65933493126124


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best_score so far: 284.55236957354686


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best_score so far: 268.62187164455725


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


In [46]:
print('Best score:', best_score)

Best score: 268.62187164455725


In [47]:
best_options

('mul', 'mul', True, 'legacy-heuristic', False)