In [1]:
import numpy as np
import pandas as pd

import forecast_tools as ft
from forecast_tools.baseline import Naive1, SNaive, baseline_estimators
from forecast_tools.datasets import load_emergency_dept

from forecast_tools.metrics import mean_absolute_error

from forecast_tools.model_selection import (cross_validation_score, 
                                            cross_validation_folds,
                                            rolling_forecast_origin, 
                                            sliding_window)

from forecast_tools.metrics import (_forecast_error_functions, 
                                    mean_absolute_scaled_error)
                                    


In [2]:
ed = load_emergency_dept()

In [3]:
def mase_cross_validation_score(model, cv, metric, seasonal_period=None):
    '''
    MASE cross validtion score
    '''
    
    folds = cross_validation_folds(model, cv)
    
    scores = []
    for y_train, y_true, y_pred in folds:
        score = metric(y_true, y_pred, y_train, 
                       period=seasonal_period)
        scores.append(score)
        
    return np.array(scores)    

In [4]:
def auto_naive(y_train, horizon=1, seasonal_period=1, 
               min_train_size='auto', method='cv', step=1, 
               window_size='auto', metric='mase'):
    '''Automatic selection of the "best" naive benchmark
    
    The selection process uses out of sample statistics.
    
    By default auto_naive uses cross validation to estimate the mean
    point forecast peformance of all naive methods.  It selects the method
    with the lowest point forecast metric on average.
    
    If there is limited data for training a basic holdout sample could be
    used.
    
    Parameters:
    ----------
    y_train: array-like
        training data.  typically in a pandas.Series, pandas.DataFrame
        or numpy.ndarray format. 
        
    horizon: int, optional (default=1)
        Forecast horizon. 
        
    seasonal_period: int, optional (default=1)
        Frequency of the data.  E.g. 7 for weekly pattern, 12 for monthly
        365 for daily.
        
    min_train_size: int or str, optional (default='auto')
        The size of the initial training set (if method=='ro' or 'sw'). 
        If 'auto' then then min_train_size is set to len(y_train) // 3
        If main_train_size='auto' and method='holdout' then 
        min_train_size = len(y_train) - horizon.
        
    method: str, optional (default='cv')
        out of sample selection method. 
        'ro' - rolling forecast origin
        'sw' - sliding window
        'cv' - scores from both ro and sw
        'holdout' - single train/test split
         Methods'ro' and 'sw' are similar, however, sw has a fixed
         window_size and drops older data from training.
        
    step: int, optional (default=1)
        The stride/step of the cross-validation. I.e. the number
        of observations to move forward between folds.
        
    window_size: str or int, optional (default='auto')
        The window_size if using sliding window cross validation
        When 'auto' and method='sw' then 
        window_size=len(y_train) // 3
        
    metric: str, optional (default='mase')
        The metric to measure out of sample accuracy.
        Options: mase, mae, mape, smape, mse, rmse, me.
        
    Returns:
    --------
    dict
        'model': baseline.Forecast
        'metric': float
        
        Contains the model and its CV performance.
    '''
    valid_methods = ['holdout', 'ro', 'sw', 'cv']
    metrics = _forecast_error_functions()
    
    #temp
    metrics['mase'] = mean_absolute_scaled_error
    
    if method not in valid_methods:
        raise ValueError(f"Method must be in {valid_methods}")
        
    if metric not in metrics:
        raise ValueError(f"Please select a metric from {metrics}")
                
    if min_train_size == 'auto':
        min_train_size = len(y_train) // 3
        
    if window_size == 'auto':
        window_size = len(y_train) // 3
        
    baselines = baseline_estimators(seasonal_period)
        
    method_score = []
    if method == 'cv':
        for _, model in baselines.items():
            cv_ro = rolling_forecast_origin(train=y_train, 
                                         min_train_size=min_train_size,
                                         horizon=horizon, 
                                         step=step)
            
            cv_sw = sliding_window(train=y_train, 
                                   window_size=window_size,
                                   horizon=horizon, 
                                   step=step)
            
            if metric == 'mase':
                score_ro = mase_cross_validation_score(model, cv_ro, metrics[metric], seasonal_period)
                score_sw = mase_cross_validation_score(model, cv_sw, metrics[metric], seasonal_period)
                          
            else:
                score_ro = cross_validation_score(model, cv_ro, metrics[metric])
                score_sw = cross_validation_score(model, cv_sw, metrics[metric])
            
            score = np.concatenate([score_ro, score_sw])    
            method_score.append(score.mean())
            
    elif method == 'ro':
        for _, model in baselines.items():
            cv = rolling_forecast_origin(train=y_train, 
                                         min_train_size=min_train_size,
                                         horizon=horizon, 
                                         step=step)

            if metric == 'mase':
                score_ro = mase_cross_validation_score(model, cv, metrics[metric], seasonal_period)
                          
            else:
                score_ro = cross_validation_score(model, cv, metrics[metric])
                
            method_score.append(score_ro.mean())
         
    elif method == 'sw':
        for _, model in baselines.items():
            cv = sliding_window(train=y_train, 
                                window_size=window_size,
                                horizon=horizon, 
                                step=step)

            if metric == 'mase':
                score_sw = mase_cross_validation_score(model, cv, metrics[metric], seasonal_period)
                          
            else:
                score_sw = cross_validation_score(model, cv, metrics[metric])
                
            method_score.append(score_sw.mean())
        
    else:
        #single train test split
        min_train_size = len(y_train) - horizon
        train = y_train[:min_train_size]
        test = y_train[min_train_size:]
        
        for _, model in baselines.items():
            model.fit(train)
            y_preds = model.predict(horizon)
            if metric == 'mase':
                score = metrics[metric](test, y_preds, y_train, seasonal_period)
            else:
                score = metrics[metric](test, y_preds)
                
            method_score.append(score.mean())
            
    method_score = np.array(method_score)
    best_index = np.argmin(method_score)
    
    best = {'model':list(baselines.items())[best_index][1],
            f'{metric}':method_score[best_index]}
    
    return best

In [5]:
best = auto_naive(ed, seasonal_period=7, horizon=7, method='ro', metric='mae')
best

{'model': Average(), 'mae': 19.521472851717423}

In [6]:
best = auto_naive(ed, seasonal_period=7, horizon=7, method='cv', metric='mae')
best

{'model': Average(), 'mae': 19.679856211931035}

In [None]:
best = auto_naive(ed, seasonal_period=7, horizon=56, method='sw', metric='mase')
best

In [None]:
best = auto_naive(ed, seasonal_period=7, horizon=56, method='holdout', metric='mase')
best

In [None]:
mean_absolute_error