# Forecasting with Sktime

In [1]:
import matplotlib.pyplot as plt
import pandas as pd


from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.forecasting.model_selection import temporal_train_test_split

from sktime.performance_metrics.forecasting import mean_absolute_percentage_error

from sktime.transformations.series.boxcox import BoxCoxTransformer
from sktime.transformations.series.detrend import ConditionalDeseasonalizer, Deseasonalizer, Detrender


In [2]:
data = pd.read_feather('../../data/3.3_data_exploded.feather')
data

Unnamed: 0,ADR,ReservationStatus,Stay Date
164,0.00,Check-Out,2015-07-01
163,4.00,Check-Out,2015-07-01
135,55.68,Check-Out,2015-07-01
227,62.00,Check-Out,2015-07-01
154,62.50,Check-Out,2015-07-01
...,...,...,...
409000,153.57,Check-Out,2017-09-11
409699,99.06,Check-Out,2017-09-12
409822,112.80,Check-Out,2017-09-12
409700,99.06,Check-Out,2017-09-13


In [3]:
data['ReservationStatus'].value_counts()

ReservationStatus
Check-Out    255040
Canceled     149740
No-Show        4477
Name: count, dtype: int64

In [4]:
co_filter = data['ReservationStatus'] == 'Check-Out'
data = data[co_filter].drop(columns='ReservationStatus')
data

Unnamed: 0,ADR,Stay Date
164,0.00,2015-07-01
163,4.00,2015-07-01
135,55.68,2015-07-01
227,62.00,2015-07-01
154,62.50,2015-07-01
...,...,...
409000,153.57,2017-09-11
409699,99.06,2017-09-12
409822,112.80,2017-09-12
409700,99.06,2017-09-13


In [5]:
data.dtypes

ADR                 float64
Stay Date    datetime64[ns]
dtype: object

In [6]:
data = (data.sort_values(by = 'Stay Date')
        .set_index('Stay Date'))
data

Unnamed: 0_level_0,ADR
Stay Date,Unnamed: 1_level_1
2015-07-01,0.00
2015-07-01,101.50
2015-07-01,101.50
2015-07-01,101.50
2015-07-01,101.50
...,...
2017-09-11,153.57
2017-09-12,99.06
2017-09-12,112.80
2017-09-13,99.06


In [7]:
y = (data['ADR']
          .resample('ME')
          .median()
          .rolling(window=3)
          .median()
          .dropna()
          .to_frame())
y

Unnamed: 0_level_0,ADR
Stay Date,Unnamed: 1_level_1
2015-09-30,112.0
2015-10-31,90.0
2015-11-30,75.0
2015-12-31,65.0
2016-01-31,65.0
2016-02-29,66.29
2016-03-31,66.4
2016-04-30,72.0
2016-05-31,82.45
2016-06-30,89.25


In [8]:
y_train, y_test = temporal_train_test_split(y, test_size=90)


In [9]:
# Define the pipeline
forecaster = TransformedTargetForecaster(steps=[
    ("boxcox", BoxCoxTransformer()),
    # ("deseasonalizer", Deseasonalizer(sp=12)),
    # ('deseasonalizer', ConditionalDeseasonalizer(sp=3)),
    ('detrender', Detrender()),
    ("forecaster", AutoARIMA())
])

forecaster.fit(y_train)

fh = list(range(1, len(y_test) + 1))  # Forecast horizon
y_pred = forecaster.predict(fh)

BracketError: The algorithm terminated without finding a valid bracket. Consider trying different initial points.

In [40]:
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f'MAPE: {mape * 100:.2f}%')

plt.figure(figsize=(10, 6))
plt.plot(y_train, label='Train')
plt.plot(y_test, label='Test')
plt.plot(y_pred, label='Predictions')
plt.legend()
plt.show()

ValueError: Found input variables with inconsistent numbers of samples: [8, 90]