In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from darts import TimeSeries
filename = "Ming25Jun67_total_order.csv"
df = pd.read_csv(f'../csv/{filename}')
df.info()

In [None]:
print(df.shape)
print(df.head())
print(df.tail())

In [None]:
df['order_completed_at'] = pd.to_datetime(df['order_completed_at'])
# make timeseries Object
series = TimeSeries.from_dataframe(df,
                                   time_col='order_completed_at',
                                   value_cols=['unique_order_count'],
                                   freq="D")

### remove outlier

In [None]:
# Function to replace outliers with the mean of the rest of the values
from scipy.stats import zscore
from darts import TimeSeries

def replace_outliers_with_mean(df, column_name):
    # Calculate Z-scores
    z_scores = zscore(df[column_name])
    
    # Identify outliers (using a threshold of 3 for Z-score)
    outliers = np.abs(z_scores) > 3
    
    # Calculate mean of non-outliers
    mean_non_outliers = df.loc[~outliers, column_name].mean()
    
    # Replace outliers with the mean of non-outliers
    df.loc[outliers, column_name] = mean_non_outliers
    
    return df

# Replace outliers in 'gmv' column
df_remove_out = replace_outliers_with_mean(df, 'unique_order_count')
series_remove_outlier = TimeSeries.from_dataframe(df_remove_out,
                                   time_col='order_completed_at',
                                   value_cols=['unique_order_count'],
                                   freq="D")

In [None]:
from darts.dataprocessing.transformers import (Scaler,)

scaler = Scaler()
series = series_remove_outlier #!!!
series_rescaled = scaler.fit_transform(series)

series = series_rescaled #!!!

### test split

In [None]:
cut_off = pd.Timestamp("2024-05-01")
training, validation =series.split_before(cut_off)

In [None]:
#check date split
validation_df = validation.pd_dataframe()
training_df = training.pd_dataframe()
print(training_df.tail())
print(validation_df.head())
print(f'training set: {training_df.shape}')
print(f'validation set: {validation_df.shape}')

### Viuslize data

In [None]:
import matplotlib.dates as mdates

plt.figure(figsize=(20, 10))  # ปรับขนาดของภาพ (กว้าง x สูง)
training.plot()
validation.plot()

# ตั้งค่ารูปแบบของวันที่บนแกน x
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gcf().autofmt_xdate()  # ปรับให้วันที่ไม่ซ้อนกัน

# กำหนด Title และชื่อของแกน x และ y
plt.title('Total Order By Date from 2022-2024')
plt.xlabel('Date')
plt.ylabel('Number of Orders (Unit)')

plt.show()

# Model 

### Regression model

##### Linear Regression

In [None]:
from darts.models import (LinearRegressionModel,
                          CatBoostModel, 
                          LightGBMModel, 
                          XGBModel)
from darts.metrics import mape, mae, mse, mase
from math import sqrt

model_lst = [LinearRegressionModel, 
             CatBoostModel, 
             LightGBMModel,
             XGBModel]

# for m in model_lst:
#     model_name = m.__name__
#     print(model_name)
#     if model_name == "LinearRegressionModel":print(True)


model_param_dict = {
    0:{"lags": list(range(1, 36)),
        "output_chunk_length": [7],
        "random_state": [42]
        },
    1:{

    }
}

# def get_best_model():
#     parameters = {
#         "lags": list(range(1, 36)),
#         "output_chunk_length": [7],  # ทดลอง output_chunk_length ที่แตกต่างกัน
#         "random_state": [42]
#     }

#     best_parameter = LinearRegressionModel.gridsearch(
#         parameters=parameters,
#         series = series, #note (training + validation) X only training set!!!
#         start=cut_off,
#         forecast_horizon=7,
#         stride=1,          
#         last_points_only=False,  
#         metric=mse,
#         verbose=-1
#     )

In [None]:
print(best_parameter)
best_model = best_parameter[0]

In [None]:

# result from training , validation only
prediction = best_model.predict(len(validation))

mape_score = mape(validation, prediction)
print(f'score MAPE: {round(mape_score, 4)}')
print(f'score ACC: {100-(round(mape_score, 4))}')

mse_score = mse(validation, prediction)
print(f'score MSE: {round(mse_score, 4)}')

print(f'score RMSE: {round(sqrt(mse_score), 4)}')

mae_score = mae(validation, prediction)
print(f'score MAE: {round(mae_score, 4)}')