### Preprocessing and CV Example: ARIMA

In [1]:
import pandas as pd
import os
os.chdir(os.path.expanduser(".."))
import tslib

In [2]:
df = pd.read_csv("./data/daily_min_temp.csv")

In [3]:
df.head()

Unnamed: 0,Date,Temp
0,1981-01-01,20.7
1,1981-01-02,17.9
2,1981-01-03,18.8
3,1981-01-04,14.6
4,1981-01-05,15.8


In [4]:
from tslib import preprocessing,evaluation

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from tslib import models

In [6]:
m = models.Arima()

In [7]:
m = m.fit(df.Temp,order=(1,1,1),seasonal_order=(1,1,1,12))

In [8]:
p = m.m.get_prediction()

In [9]:
p.conf_int(alpha=0.05)

Unnamed: 0,lower Temp,upper Temp
0,-2771.818013,2771.818013
1,-1939.278682,1980.678601
2,-1942.078649,1977.878634
3,-1941.178649,1978.778634
4,-1945.378628,1974.578655
...,...,...
3645,9.476804,19.013534
3646,9.065588,18.602318
3647,8.917373,18.454101
3648,8.892820,18.429533


In [10]:
f=m.m.get_forecast(steps=5)
f.conf_int(alpha=0.05)

Unnamed: 0,lower Temp,upper Temp
3650,8.475395,18.01208
3651,8.151921,19.001832
3652,7.952235,19.251943
3653,7.842032,19.358349
3654,7.692557,19.347372


In [11]:
proc = preprocessing.CVPreprocessor(start_idx=2000,step_size=180,horizon=365)

In [12]:
folds = proc.fit(df.Temp)

In [13]:
eval = evaluation.CrossValidation()

In [14]:
r = eval.evaluate(folds,m,type='arima')

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:28<00:00,  4.14s/it]


In [15]:
r1 = r['mae']

In [16]:
r1

Unnamed: 0,horizon,mae,mae_std,mae_min,mae_max
0,1,2.444837,1.286512,1.117795,4.654831
1,2,2.328849,1.893628,0.158686,4.430607
2,3,2.457346,2.522685,0.195303,7.334024
3,4,2.930737,2.184576,0.610926,6.185369
4,5,3.022350,1.873007,1.059140,6.788804
...,...,...,...,...,...
360,361,3.199966,1.279577,1.787342,5.879808
361,362,1.636884,2.129410,0.135662,5.885164
362,363,1.546141,1.409853,0.004926,3.725084
363,364,3.186408,1.952048,0.703074,5.875277


In [17]:
r2 = r['mape']
r2

Unnamed: 0,horizon,mape,mape_std,mape_min,mape_max
0,1,0.240041,0.096574,0.096362,0.391162
1,2,0.259026,0.248076,0.018452,0.726329
2,3,0.499803,0.942624,0.025364,2.619294
3,4,0.418752,0.561223,0.076366,1.671721
4,5,0.340809,0.293458,0.111488,0.948034
...,...,...,...,...,...
360,361,0.287287,0.104894,0.158202,0.494102
361,362,0.134648,0.170527,0.022240,0.498743
362,363,0.269802,0.386011,0.000560,1.087304
363,364,0.312870,0.152636,0.106526,0.530622


In [18]:
r3 = r['smape']
r3

Unnamed: 0,horizon,smape,smape_std,smape_min,smape_max
0,1,0.058549,0.031333,0.022983,0.121567
1,2,0.058267,0.050982,0.004656,0.133206
2,3,0.071104,0.097370,0.006422,0.283517
3,4,0.078089,0.070697,0.019849,0.227648
4,5,0.074879,0.046373,0.029518,0.160791
...,...,...,...,...,...
360,361,0.080731,0.040610,0.042948,0.164055
361,362,0.040290,0.057737,0.005622,0.166108
362,363,0.056786,0.067407,0.000140,0.176093
363,364,0.079753,0.037477,0.025285,0.134798


In [19]:
## Pre-post--------> Analysis
## Clumpiness------> Dynamic Time Warping
## Sparse Data-------> Forecasting

In [20]:
from tslib.evaluation import ClumpinessEval

In [21]:
ts = pd.Series([0,0,0,14,0,0,16,0,0,18,0,0,0,16])

In [22]:
clump = ClumpinessEval()

In [23]:
clump.evaluate(ts)

{'adi': 3.5,
 'cv': 0.10206207261596575,
 'demand_pattern': "intermitent: Use Croston's or TSB method"}