In [1]:
import pycaret as pc
import pandas as pd

In [22]:
sell_data = pd.read_excel('./HistoryOfSell_Rev02.xlsx', sheet_name='Sell')

sell_data.head()

Unnamed: 0,Date,A1,A2,A3,A4,A5,A6,A7,A8,A9,...,A15,A16,A17,A18,A19,A20,A21,A22,A23,A24
0,2014-03-01,2,2,0,1,1,0,2,0,1,...,0,0,1,0,1,0,1,1,0,0
1,2014-04-01,6,4,0,2,4,0,1,1,0,...,0,1,0,2,6,3,5,5,2,2
2,2014-05-01,9,3,0,3,6,0,0,1,0,...,0,0,2,0,4,1,3,1,0,0
3,2014-06-01,8,3,2,6,6,0,2,2,4,...,0,0,2,0,6,2,6,3,0,0
4,2014-07-01,11,4,0,6,4,0,2,3,3,...,0,0,2,0,9,0,6,5,0,0


In [3]:
sell_data['A2'].describe()

count    111.000000
mean       5.693694
std        3.324271
min        0.000000
25%        3.000000
50%        5.000000
75%        7.000000
max       23.000000
Name: A2, dtype: float64

In [23]:
import plotly.express as px


sell_data['A2_Mean'] = sell_data['A2'].rolling(12).mean()

fig = px.line(sell_data, x='Date', y=['A2', 'A2_Mean'], template='plotly_dark')
fig.show()

# A2

In [24]:
import numpy as np

# extract month and year from dates
sell_data['Month'] = [i.month for i in sell_data['Date']]
sell_data['Year'] = [i.year for i in sell_data['Date']]

# create a sequence of numbers
sell_data['Series'] = np.arange(1,len(sell_data)+1)

# drop unnecessary columns and re-arrange

sell_data.drop(['Date', 'A2_Mean'], axis=1, inplace=True)
sell_data = sell_data[['Series', 'Year', 'Month', 'A2']] 

# check the head of the dataset
sell_data.head()

Unnamed: 0,Series,Year,Month,A2
0,1,2014,3,2
1,2,2014,4,4
2,3,2014,5,3
3,4,2014,6,3
4,5,2014,7,4


In [26]:
# split data into train-test set
train = sell_data[sell_data['Year'] < 2022]
test = sell_data[sell_data['Year'] >= 2022]

# check shape
train.shape, test.shape


((94, 4), (17, 4))

In [27]:
# import the regression module
from pycaret.regression import *

# initialize setup
s = setup(data = train, test_data = test, target = 'A2', fold_strategy = 'timeseries', numeric_features = ['Year', 'Series'], fold = 20, transform_target = True, session_id = 123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,A2
2,Target type,Regression
3,Original data shape,"(111, 4)"
4,Transformed data shape,"(111, 4)"
5,Transformed train set shape,"(94, 4)"
6,Transformed test set shape,"(17, 4)"
7,Numeric features,2
8,Preprocess,1
9,Imputation type,simple


In [28]:
best = compare_models(sort = 'MSE')

Processing:   0%|          | 0/77 [00:00<?, ?it/s]

In [14]:
prediction_holdout = predict_model(best);

In [15]:
# generate predictions on the original dataset**
predictions = predict_model(best, data=sell_data)

# add a date column in the dataset**
predictions['Date'] = pd.date_range(start='2014-03-01', end = '2023-05-01', freq = 'MS')

# line plot**
fig = px.line(predictions, x='Date', y=["A2", "prediction_label"], template = 'plotly_dark')

# add a vertical rectange for test-set separation**
fig.add_vrect(x0="2022-07-01", x1="2023-06-01", fillcolor="grey", opacity=0.25, line_width=0)

fig.show()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Lasso Least Angle Regression,2.4449,11.1351,3.3369,-0.0168,0.4976,0.5964


In [16]:
predictions

Unnamed: 0,Series,Year,Month,A2,prediction_label,Date
0,1,2014,3,1.238883,5.264852,2014-03-01
1,2,2014,4,1.922244,5.264852,2014-04-01
2,3,2014,5,1.614501,5.264852,2014-05-01
3,4,2014,6,1.614501,5.264852,2014-06-01
4,5,2014,7,1.922244,5.264852,2014-07-01
...,...,...,...,...,...,...
106,107,2023,1,0.747338,5.264852,2023-01-01
107,108,2023,2,2.620700,5.264852,2023-02-01
108,109,2023,3,0.747338,5.264852,2023-03-01
109,110,2023,4,2.184857,5.264852,2023-04-01


In [17]:
final_best = finalize_model(best)

In [18]:
future_dates = pd.date_range(start = '2023-06-01', end = '2024-05-01', freq = 'MS')

future_df = pd.DataFrame()

future_df['Month'] = [i.month for i in future_dates]
future_df['Year'] = [i.year for i in future_dates]    
future_df['Series'] = np.arange(112,(112+len(future_dates)))

future_df.head()

Unnamed: 0,Month,Year,Series
0,6,2023,112
1,7,2023,113
2,8,2023,114
3,9,2023,115
4,10,2023,116


In [19]:
predictions_future = predict_model(final_best, data=future_df)
predictions_future.head()

Unnamed: 0,Month,Year,Series,prediction_label
0,6,2023,112,5.12933
1,7,2023,113,5.12933
2,8,2023,114,5.12933
3,9,2023,115,5.12933
4,10,2023,116,5.12933


In [20]:
concat_df = pd.concat([sell_data, predictions_future], axis=0)
concat_df_i = pd.date_range(start='2014-03-01', end = '2024-05-01', freq = 'MS')
concat_df.set_index(concat_df_i, inplace=True)

fig = px.line(concat_df, x=concat_df.index, y=["A2", "prediction_label"], template = 'plotly_dark')
fig.show()