In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from darts import TimeSeries

df = pd.read_csv('../csv/Ming1Jul67_total_order.csv')
df.tail()

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df['order_completed_at'] = pd.to_datetime(df['order_completed_at'])
# make timeseries Object
series = TimeSeries.from_dataframe(df,
                                   time_col='order_completed_at',
                                   value_cols=['unique_order_count'],
                                   freq="D")

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

plt.figure(figsize=(20, 10)) 
series.plot()

plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gcf().autofmt_xdate() 

plt.title('Total Order By Date from 2022-2024')
plt.xlabel('Date')
plt.ylabel('Number of Orders (Unit)')

plt.show()

In [None]:
# Function to replace outliers with the mean of the rest of the values
from scipy.stats import zscore
from darts import TimeSeries

def replace_outliers_with_mean(df, column_name):
    # Calculate Z-scores
    z_scores = zscore(df[column_name])
    
    # Identify outliers (using a threshold of 3 for Z-score)
    outliers = np.abs(z_scores) > 3
    
    # Calculate mean of non-outliers
    mean_non_outliers = df.loc[~outliers, column_name].mean()
    
    # Replace outliers with the mean of non-outliers
    df.loc[outliers, column_name] = mean_non_outliers
    
    return df

# Replace outliers in 'gmv' column
df_remove_out = replace_outliers_with_mean(df, 'unique_order_count')
series_remove_outlier = TimeSeries.from_dataframe(df_remove_out,
                                   time_col='order_completed_at',
                                   value_cols=['unique_order_count'],
                                   freq="D")
series = series_remove_outlier

In [None]:
# from darts.dataprocessing.transformers import (Scaler,)

# scaler = Scaler()
# series = series_remove_outlier #!!!
# series_rescaled = scaler.fit_transform(series)

In [None]:
# series = series_rescaled
cut_off  = pd.Timestamp('2024-05-01')
training, validation =series.split_before(cut_off)
# training, validation =series.split_before(pd.Timestamp('2024-05-01')) # 1 month
# training, validation =series.split_before(pd.Timestamp('2024-04-01')) # 2 month

validation_df = validation.pd_dataframe()
training_df = training.pd_dataframe()
print(training_df.tail())
print(validation_df.head())

In [None]:
print(training_df.shape)
print(validation_df.shape)

In [None]:
import matplotlib.dates as mdates

plt.figure(figsize=(20, 10))  # ปรับขนาดของภาพ (กว้าง x สูง)
training.plot()
validation.plot()

# ตั้งค่ารูปแบบของวันที่บนแกน x
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gcf().autofmt_xdate()  # ปรับให้วันที่ไม่ซ้อนกัน

# กำหนด Title และชื่อของแกน x และ y
plt.title('Total Order By Date from 2022-2024')
plt.xlabel('Date')
plt.ylabel('Number of Orders (Unit)')

plt.show()

In [None]:
# from darts.models import ExponentialSmoothing

# model = ExponentialSmoothing(

#     random_state=42
# )
# model.fit(training)
# forecast = model.predict(len(validation))

In [None]:
from darts.metrics import mape, mae, mse, mase
from math import sqrt
from darts.utils.utils import ModelMode, SeasonalityMode
from darts.models import ExponentialSmoothing

In [None]:
parameters = {
    "trend":[ModelMode.ADDITIVE, ModelMode.MULTIPLICATIVE],
    "seasonal": [SeasonalityMode.ADDITIVE, SeasonalityMode.MULTIPLICATIVE],
    'seasonal_periods': list(range(2,31)),
    "random_state": [42]
}

best_parameter = ExponentialSmoothing.gridsearch(
    parameters=parameters,
    series = series, #note (training + validation) X only training set!!!
    start=cut_off,
    forecast_horizon=7,
    stride=1,          
    last_points_only=False,  
    metric=mse,
    verbose=-1
)

In [None]:
best_model = ExponentialSmoothing(
    trend=ModelMode.ADDITIVE, 
    seasonal=SeasonalityMode.MULTIPLICATIVE, 
    seasonal_periods=3, 
    random_state= 42
)
best_model.fit(training)

In [None]:
print(best_parameter)

In [None]:
best_model = best_parameter[0]
best_model.fit(training)

In [None]:
# result from training , validation only
prediction = best_model.predict(len(validation))

mape_score = mape(validation, prediction)
print(f'score MAPE: {round(mape_score, 4)}')
print(f'score ACC: {100-(round(mape_score, 4))}')

mse_score = mse(validation, prediction)
print(f'score MSE: {round(mse_score, 4)}')

print(f'score RMSE: {round(sqrt(mse_score), 4)}')

mae_score = mae(validation, prediction)
print(f'score MAE: {round(mae_score, 4)}')

In [None]:
from darts.metrics import mape, mae, mse, mase
from math import sqrt

backtest_errors = best_model.backtest(
     series,
     start=0.8,          # เริ่มต้นที่ 90% ของข้อมูล
     forecast_horizon=7, #จำนวนก้าวการพยากรณ์ในอนาคตที่ต้องการ
     stride=1,           #ช่วงการเลื่อนในการทดสอบแต่ละครั้ง
     last_points_only=False,  #  ให้แสดงเฉพาะจุดสิ้นสุดของการพยากรณ์หรือไม่
     metric=mape,
     reduction=np.mean,           #ฟังก์ชันการลดรูปผลลัพธ์ (เช่น mean การหาเฉลี่ย)
     verbose=-1
)
print(f'backtest_errors MAPE: {backtest_errors}')

In [None]:
plt.figure(figsize=(20, 10))  # ปรับขนาดของภาพ (กว้าง x สูง)
##prediction
prediction.plot(label="prediction")
training.plot(label="training")
validation.plot(label="validation")

plt.legend()
plt.title('Train, Validation, and Prediction')
plt.xlabel('Date')
plt.ylabel('Order Unit')
plt.show()

In [None]:
# predict future
best_model.fit(series)

In [None]:
#check date
series.pd_dataframe().tail(14)

In [None]:
forecast = best_model.predict(7)
# forecast  = scaler.inverse_transform(forecast)
forecast = forecast.pd_dataframe().reset_index()
forecast.columns = ['Date', 'Total Order']
print(forecast.tail(7))

forecast.tail(7).to_csv('../forecast/Date_ExponentialSmoothing_1month.csv', index=False)

In [None]:
best_model.save("../model/Exponential_Smoothing_totalOrder_1Jul67.pkl")

In [None]:
from darts.metrics import mape, mae, mse, mase
from math import sqrt
from darts.models import ExponentialSmoothing

training, validation =series.split_before(pd.Timestamp('2024-05-01')) # 1 month

best_model = ExponentialSmoothing.load("../model_old/exponentialSmooth/Exponential_Smoothing_totalOrder_25Jun67.pkl")
backtest_errors = best_model.backtest(
     series,
     start=pd.Timestamp('2024-05-01'),          # เริ่มต้นที่ 90% ของข้อมูล
     forecast_horizon=7, #จำนวนก้าวการพยากรณ์ในอนาคตที่ต้องการ
     stride=1,           #ช่วงการเลื่อนในการทดสอบแต่ละครั้ง
     last_points_only=False,  #  ให้แสดงเฉพาะจุดสิ้นสุดของการพยากรณ์หรือไม่
     metric=mse,
     reduction=np.mean           #ฟังก์ชันการลดรูปผลลัพธ์ (เช่น mean การหาเฉลี่ย)
)
print(f'backtest_errors MSE: {backtest_errors}')