# Quickstart

From: https://github.com/alan-turing-institute/sktime

## 1. Forecasting

In [1]:
from sktime.datasets import load_airline
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.theta import ThetaForecaster
from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

### Data

In [2]:
y = load_airline()

In [3]:
type(y)

pandas.core.series.Series

In [4]:
y.shape

(144,)

In [5]:
y

Period
1949-01    112.0
1949-02    118.0
1949-03    132.0
1949-04    129.0
1949-05    121.0
           ...  
1960-08    606.0
1960-09    508.0
1960-10    461.0
1960-11    390.0
1960-12    432.0
Freq: M, Name: Number of airline passengers, Length: 144, dtype: float64

In [6]:
y_train, y_test = temporal_train_test_split(y)

In [7]:
print("y_train.shape", y_train.shape)
print("y_test.shape", y_test.shape)

y_train.shape (108,)
y_test.shape (36,)


### Model

In [8]:
fh = ForecastingHorizon(y_test.index, is_relative=False)

In [9]:
forecaster = ThetaForecaster(sp=12)  # monthly seasonal periodicity

In [10]:
forecaster.fit(y_train)

ThetaForecaster(sp=12)

In [11]:
y_pred = forecaster.predict(fh)

In [12]:
mean_absolute_percentage_error(y_test, y_pred)

0.08661467619871827

## 2. Time Series Classification

In [13]:
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.datasets import load_arrow_head
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### Data

In [14]:
X, y = load_arrow_head(return_X_y=True)

In [15]:
from IPython.display import display
print(type(X))
print(X.shape)
display(X.head())
print()

# Note the multi indexing!!!
display(X.loc[1, :])
selected_one_cell = X.loc[1, :].iloc[1]
print("selected_one_cell.shape:", selected_one_cell.shape)
select_the_data = selected_one_cell[0]  # <-- ‼️ NOTE THIS SELECTION HERE, WHICH IS COUNTERINTUITIVE!
print("select_the_data.shape:", select_the_data.shape)
print("\nselect_the_data:")
display(select_the_data)

<class 'pandas.core.frame.DataFrame'>
(211, 1)


Unnamed: 0,dim_0
0,0 -1.9630 1 -1.9578 2 -1.9561 3 ...
1,0 -1.7746 1 -1.7740 2 -1.7766 3 ...
2,0 -1.8660 1 -1.8420 2 -1.8350 3 ...
3,0 -2.0738 1 -2.0733 2 -2.0446 3 ...
4,0 -1.7463 1 -1.7413 2 -1.7227 3 ...





Unnamed: 0,dim_0
1,0 -1.7746 1 -1.7740 2 -1.7766 3 ...
1,0 -1.8299 1 -1.8085 2 -1.7958 3 ...


selected_one_cell.shape: (1,)
select_the_data.shape: (251,)

select_the_data:


0     -1.8299
1     -1.8085
2     -1.7958
3     -1.7666
4     -1.7508
        ...  
246   -1.7334
247   -1.7613
248   -1.8034
249   -1.8432
250   -1.8275
Length: 251, dtype: float64

In [16]:
print(type(y))
print(y.shape)
display(y.head())

# Note the multi indexing!!!
display(y.loc[1])

<class 'pandas.core.series.Series'>
(211,)


0    0
1    1
2    2
3    0
4    1
dtype: object

1    1
1    0
dtype: object

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

### Model

In [18]:
classifier = TimeSeriesForestClassifier()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
accuracy_score(y_test, y_pred)

0.8490566037735849