In [None]:
!pip install sktime[all_extras]



In [None]:
# utilities packages
import datetime

# data wrangling and transformation package
import numpy as np
import pandas as pd
import statsmodels.api as sm

# data visualization package
import matplotlib.pyplot as plt
import seaborn as sns

# time series modeling
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf, month_plot
from statsmodels.stats.diagnostic import acorr_ljungbox

# load sktime for time series forecasting
from sktime.utils.plotting import plot_series # from plotting
from sktime.forecasting.naive import NaiveForecaster # for naive forecasting
from sktime.forecasting.arima import AutoARIMA # for automated ARIMA forecasting
from sktime.forecasting.exp_smoothing import ExponentialSmoothing # for exponential smoothing forecasting
from sktime.forecasting.ets import AutoETS # for automated ETS forecasting 
from sktime.forecasting.tbats import TBATS # for TBATS forecasting
from sktime.forecasting.fbprophet import Prophet # for Prophet forecasting
from sktime.forecasting.base import ForecastingHorizon # for creating forecast horizon
from sktime.forecasting.model_selection import (
    temporal_train_test_split, 
    ExpandingWindowSplitter, 
    SlidingWindowSplitter
) # for train-test split and cross validation
from sktime.performance_metrics.forecasting import (
    mean_absolute_error, 
    mean_squared_error, 
    mean_absolute_percentage_error
) # for evaluation metrics
from sktime.forecasting.model_evaluation import evaluate # for evaluating CV results

%matplotlib inline

In [None]:
# load e-commerce  dataset
e_com = pd.read_csv("https://raw.githubusercontent.com/hadimaster65555/dataset_for_teaching/main/dataset/time_series_data/superstore_time_series_dataset/superstore_train.csv", encoding="latin-1")

In [None]:
# create invoice_date column
e_com['Order Date'] = pd.to_datetime(e_com['Order Date']).dt.normalize()

#Unit Price after Discount
e_com['Potongan harga'] = e_com["Sales"]* e_com['Discount']

#Unit Pirce After Discount
e_com['Unit Price'] = round(e_com['Sales'] - e_com['Potongan harga'], 3)


In [None]:
e_com.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit,Potongan harga,Unit Price
0,1,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136,0.0,261.96
1,2,CA-2016-152156,2016-11-08,2016-11-11,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582,0.0,731.94
2,3,CA-2016-138688,2016-06-12,2016-06-16,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714,0.0,14.62
3,4,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031,430.909875,526.668
4,5,US-2015-108966,2015-10-11,2015-10-18,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164,4.4736,17.894


In [None]:
# Aggregate data
e_com = e_com.groupby('Order Date', as_index=False).agg({
    "Order ID": 'nunique'
})

In [None]:
# Renaming
com_day = e_com.rename(columns={
    "Order ID": "Total transaksi"
})

In [None]:
# check the first 5 rows
com_day.head()

Unnamed: 0,Order Date,Total transaksi
0,2014-01-03,1
1,2014-01-04,1
2,2014-01-05,1
3,2014-01-06,3
4,2014-01-07,1


In [None]:
com_day.set_index('Order Date', inplace=True)
com_day.head()

Unnamed: 0_level_0,Total transaksi
Order Date,Unnamed: 1_level_1
2014-01-03,1
2014-01-04,1
2014-01-05,1
2014-01-06,3
2014-01-07,1


In [None]:
com_day.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1181 entries, 2014-01-03 to 2017-12-30
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   Total transaksi  1181 non-null   int64
dtypes: int64(1)
memory usage: 18.5 KB


In [None]:
#com_day.asfreq('d').index

In [None]:
plot_series(com_day);

In [None]:
plot_acf(com_day).set_size_inches((14,5));

In [None]:
plot_pacf(com_day).set_size_inches((14,5));

In [None]:
# plot time series data
plot_series(com_day.diff());

In [None]:
# plot ACF
plot_acf(com_day.diff().dropna()).set_size_inches((14,5));

In [None]:
# plot PACF
plot_pacf(com_day.diff().dropna()).set_size_inches((14,5));

In [None]:
# plot time series data
plot_series(np.log(com_day));

In [None]:
# plot ACF
plot_acf(np.log(com_day)).set_size_inches((14,5));

In [None]:
# plot PACF
plot_pacf(np.log(com_day)).set_size_inches((14,5));

In [None]:
# Do train-test split
y_train, y_test = temporal_train_test_split(com_day, test_size=36)

In [None]:
# check the time series after splitting
plot_series(y_train, y_test, labels=["y_train", "y_test"]);

In [None]:
#y_test.asfreq('d').index
#datetime_index = pd.PeriodIndex(y_test, freq='D')

In [None]:
#periodIndex = pd.DatetimeIndex(['Order Date'], freq="D")
datetime_index = pd.DatetimeIndex(y_test.values)

In [None]:
pidx = pd.PeriodIndex(y_test.index, freq='D')

In [None]:
#specific data points
fh = ForecastingHorizon(pidx, is_relative=False)

In [None]:
fh

ForecastingHorizon(['2017-11-24', '2017-11-25', '2017-11-26', '2017-11-27',
             '2017-11-28', '2017-11-29', '2017-11-30', '2017-12-01',
             '2017-12-02', '2017-12-03', '2017-12-04', '2017-12-05',
             '2017-12-06', '2017-12-07', '2017-12-08', '2017-12-09',
             '2017-12-10', '2017-12-11', '2017-12-13', '2017-12-14',
             '2017-12-15', '2017-12-16', '2017-12-17', '2017-12-18',
             '2017-12-19', '2017-12-20', '2017-12-21', '2017-12-22',
             '2017-12-23', '2017-12-24', '2017-12-25', '2017-12-26',
             '2017-12-27', '2017-12-28', '2017-12-29', '2017-12-30'],
            dtype='period[D]', is_relative=False)

In [None]:
# define arima forecaster model
arima_forecaster = AutoARIMA(sp=12, suppress_warnings=True, random_state = 1000)

In [None]:
# fit forecaster to train data
arima_forecaster.fit(y_train)



In [None]:
# do prediction based on forecast horizon we already defined
y_pred_arima = arima_forecaster.predict(fh=fh)