<a href="https://colab.research.google.com/github/ThierrryScotto/cross-validation/blob/main/cross_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as np
from sklearn.model_selection import TimeSeriesSplit

## Rolling Forecast Origin

The following code and output provide a simplified view of how rolling forecast horizons work in practice

In [None]:
def rolling_forecast_origin(train, min_train_size, horizon):
  '''
  Roling forecast origin generator.
  '''
  for i in range(len(train) - min_train_size - horizon + 1):
    split_train = train[:min_train_size + i]
    split_val = train[min_train_size + i : min_train_size + i + horizon]
    yield split_train, split_val

In [None]:
full_series = [2502, 2414, 2800, 2143, 2708, 1900, 2333, 2222, 1234, 3456]

test = full_series[-2:]
train = full_series[:-2]
print("Full training set: {0}".format(train))
print("Full test set: {0}".format(test))

Full training set: [2502, 2414, 2800, 2143, 2708, 1900, 2333, 2222]
Full test set: [1234, 3456]


In [None]:
cv_rolling = rolling_forecast_origin(train, min_train_size=4, horizon=1)
cv_rolling

i = 0

for cv_train, cv_val in cv_rolling:
  print(f'CV[{i+1}]')
  print(f'Train:\t{cv_train}')
  print(f'Val:\t{cv_val}')
  print('----------')
  i += 1

CV[1]
Train:	[2502, 2414, 2800, 2143]
Val:	[2708]
----------
CV[2]
Train:	[2502, 2414, 2800, 2143, 2708]
Val:	[1900]
----------
CV[3]
Train:	[2502, 2414, 2800, 2143, 2708, 1900]
Val:	[2333]
----------
CV[4]
Train:	[2502, 2414, 2800, 2143, 2708, 1900, 2333]
Val:	[2222]
----------


## Sliding Window Cross Validation

In [None]:
def sliding_window(train, window_size, horizon):
  '''
  Sliding window generator.
  '''
  for i in range(len(train) - window_size - horizon + 1):
    split_train = train[i : window_size + i]
    split_val = train[i + window_size : window_size + i + horizon]
    yield split_train, split_val

In [None]:
cv_sliding = sliding_window(train, window_size=4, horizon=1)

print('full training set {0}\n'.format(train))

i = 0
for cv_train, cv_val in cv_sliding:
    print(f'CV[{i+1}]')
    print(f'Train:\t{cv_train}')
    print(f'Val:\t{cv_val}')
    print('----------')
    i += 1

full training set [2502, 2414, 2800, 2143, 2708, 1900, 2333, 2222]

CV[1]
Train:	[2502, 2414, 2800, 2143]
Val:	[2708]
----------
CV[2]
Train:	[2414, 2800, 2143, 2708]
Val:	[1900]
----------
CV[3]
Train:	[2800, 2143, 2708, 1900]
Val:	[2333]
----------
CV[4]
Train:	[2143, 2708, 1900, 2333]
Val:	[2222]
----------


# Cross Validation example

In [None]:
def cross_validation_score(model, train, cv, metric):
  '''
  Calculate cross validation scores
  '''
  cv_scores = []
  for cv_train, cv_test in cv:
    model.fit(cv_train)
    preds = model.predict(horizon=(len(cv_test)))
    score = metric(y_true=cv_test, y_pred=preds)
    cv_scores.append(score)
    return np.array(cv_scores)

In [None]:
import forecast
from sklearn.metrics import mean_absolute_error

In [None]:
forecast.baseline

AttributeError: ignored