In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.linear_model import BayesianRidge

from darts.models import (
    LinearRegressionModel,
    RegressionModel,
    LightGBMModel,
    XGBModel,
    CatBoostModel,
    RandomForest
)
from darts.metrics import mape
from darts.datasets import ElectricityConsumptionZurichDataset
from darts.explainability import ShapExplainer

In [None]:
import pandas as pd

# อ่านข้อมูลจากไฟล์ CSV
df = pd.read_csv('csv/Ming13Jun67_total_order.csv')

# แปลงคอลัมน์ order_completed_at เป็น datetime
df['order_completed_at'] = pd.to_datetime(df['order_completed_at'])

# แสดงข้อมูลเบื้องต้นของ DataFrame
df.info()

In [None]:
print(df.head())
print(df.tail())

In [None]:
from darts import TimeSeries

ts_order_quantity = TimeSeries.from_dataframe(df, 'order_completed_at', 'unique_order_count')

# resample to hourly frequency
ts_order_quantity = ts_order_quantity.resample(freq="D")

ts_order = ts_order_quantity["unique_order_count"]

# create train and validation splits
validation_cutoff = pd.Timestamp("2024-05-31")
ts_training, ts_validation = ts_order.split_after(validation_cutoff)
ts_cov_train,ts_cov_test  = ts_order.split_after(validation_cutoff)

plt.figure(figsize=(12, 6))  
ts_order.plot()
plt.show()



# Linear Regression

In [None]:
model = LinearRegressionModel(lags=33)
model.fit(ts_training)
pred = model.predict(30)

plt.figure(figsize=(20, 10)) 

ts_training.plot(label="training")
ts_validation.plot(label="validation")
pred.plot(label="forecast")


plt.show()

In [None]:
from darts.metrics import mape, mse, mae, rmse
pred_lr = model.predict(len(ts_validation))

print(f'Linear Regression MAPE: lag {33} {mape(ts_validation, pred_lr)}')
print(f'MSE {mse(ts_validation, pred_lr)}')
print(f'MAE {mae(ts_validation, pred_lr)}')
print(f'RMSE {rmse(ts_validation, pred_lr)}')

In [None]:
# Predict
best_model = LinearRegressionModel.load('model/linearRegressionModel_totalOrder_13Jun67.pkl')
best_model.fit(ts_order_quantity)

# ts_order_quantity.pd_dataframe().tail()

forecast = best_model.predict(7)
forecast = forecast.pd_dataframe().reset_index()
forecast.columns = ['Date', 'Total Order']
forecast.tail(7)

In [None]:
from darts.models import LinearRegressionModel, Prophet

best_model.save("model/linearRegressionModel_totalOrder_13Jun67.pkl")

# Linear Single Model, Multi Model

In [None]:
from darts.metrics import mape, mse, mae, rmse

def find_best_chunk_len(len_chunk:int):
    best_value = {"chunk":1, "value":0} #num chunk, error value (mape)
    for i in range(1,len_chunk+1):
        multi_models = LinearRegressionModel(lags=33, output_chunk_length=len_chunk, multi_models=True)
        multi_models.fit(ts_training)
        pred_multi_models = multi_models.predict(len(ts_validation))
        
        if i==1:
            best_value["value"] = mape(ts_validation, pred_multi_models)
        else:
            if best_value["value"] > mape(ts_validation, pred_multi_models) : 
                best_value["chunk"]=i; best_value["value"]=mape(ts_validation, pred_multi_models)
        
        return best_value

out = find_best_chunk_len(60)
print(f"chunk {out["chunk"]} mape: {out["value"]}")


In [None]:

multi_models = LinearRegressionModel(lags=33, output_chunk_length=1, multi_models=True)
multi_models.fit(ts_training)

pred_multi_models = multi_models.predict(len(ts_validation))

plt.figure(figsize=(20, 10)) 
ts_training.plot(label="training")
ts_validation.plot(label="validation")
pred_multi_models.plot(label="forecast (multi models)")

In [None]:
print(f'Linear Regression Multi Model MAPE: lag {33} {mape(ts_validation, pred_multi_models)}')
print(f'MSE {mse(ts_validation, pred_multi_models)}')
print(f'MAE {mae(ts_validation, pred_multi_models)}')
print(f'RMSE {rmse(ts_validation, pred_multi_models)}')

In [None]:
from darts.metrics import mape, mse, mae, rmse

def find_best_chunk_len(len_chunk:int):
    best_value = {"chunk":1, "value":0} #num chunk, error value (mape)
    for i in range(1,len_chunk+1):
        single_models = LinearRegressionModel(lags=33, output_chunk_length=len_chunk, multi_models=False)
        single_models.fit(ts_training)
        pred_single_models = single_models.predict(len(ts_validation))
        
        if i==1:
            best_value["value"]=mape(ts_validation, pred_single_models)
        else:
            if best_value["value"]>pred_single_models : 
                best_value["chunk"]=i; best_value["value"]=pred_single_models
        
        return best_value

out = find_best_chunk_len(60)
print(f"chunk {out["chunk"]} mape: {out["value"]}")


In [None]:

single_models = LinearRegressionModel(lags=33, output_chunk_length=1, multi_models=False)
single_models.fit(ts_training)

pred_single_models = single_models.predict(len(ts_validation))

plt.figure(figsize=(20, 10)) 
ts_training.plot(label="training")
ts_validation.plot(label="validation")
pred_single_models.plot(label="forecast (multi models)")

In [None]:
print(f'Linear Regression Single Model MAPE: lag {33} {mape(ts_validation, pred_single_models)}')
print(f'MSE {mse(ts_validation, pred_single_models)}')
print(f'MAE {mae(ts_validation, pred_single_models)}')
print(f'RMSE {rmse(ts_validation, pred_single_models)}')

# Random Forest Model

In [None]:
from darts.models import (
    LinearRegressionModel,
    RegressionModel,
    LightGBMModel,
    XGBModel,
    CatBoostModel,
    RandomForest
)

# สร้างและฝึก Linear Regression Model
best_lag = 1
for i in range(1,31):
    model_Rforest = RandomForest(lags=i)
    model_Rforest.fit(ts_training)

    # ทำนายค่าใน validation set
    pred_Rforest = model_Rforest.predict(len(ts_validation))

    # คำนวณค่า MAPE (Mean Absolute Percentage Error)
    error = mape(ts_validation, pred_Rforest)
    if i==1:
        error_best= error
    else:
        if error < error_best:
            error_best = error; best_lag = i
print(f'Random Forest MAPE: lag {best_lag} MAPE {error_best}\n')

model_Rforest = RandomForest(lags=best_lag)
model_Rforest.fit(ts_training)
pred_Rforest = model_Rforest.predict(len(ts_validation))

print(f'Train Random Forest MAPE: lag {best_lag} MAPE {mape(ts_validation, pred_Rforest)}')
print(f'MSE {mse(ts_validation, pred_Rforest)}')
print(f'MAE {mae(ts_validation, pred_Rforest)}')
print(f'RMSE {rmse(ts_validation, pred_Rforest)}')

In [None]:
best_depth = 1
for i in range(1,31):
    model_Rforest = RandomForest(lags=27, max_depth=i)
    model_Rforest.fit(ts_training)

    # ทำนายค่าใน validation set
    pred_Rforest = model_Rforest.predict(len(ts_validation))

    # คำนวณค่า MAPE (Mean Absolute Percentage Error)
    error = mape(ts_validation, pred_Rforest)
    if i==1:
        error_best= error
    else:
        if error < error_best:
            error_best = error;best_depth = i
    print(f'Random Forest MAPE: depth {best_depth} MAPE {error_best}\n')

model_Rforest = RandomForest(lags=27, max_depth=best_depth)
model_Rforest.fit(ts_training)
pred_Rforest = model_Rforest.predict(len(ts_validation))

print(f'Train Random Forest MAPE: depth {best_depth} MAPE {mape(ts_validation, pred_Rforest)}')
print(f'MSE {mse(ts_validation, pred_Rforest)}')
print(f'MAE {mae(ts_validation, pred_Rforest)}')
print(f'RMSE {rmse(ts_validation, pred_Rforest)}')

In [None]:
# # สร้างและฝึก Linear Regression Model
# best_est_n = 100
# for i in [20,50,100,150,200,300,400,500,600,700,800,900,1000]:
#     model_Rforest = RandomForest(lags=1, max_depth=9, n_estimators=i)
#     model_Rforest.fit(ts_training)

#     # ทำนายค่าใน validation set
#     pred_Rforest = model_Rforest.predict(len(ts_validation))

#     # คำนวณค่า MAPE (Mean Absolute Percentage Error)
#     error = mape(ts_validation, pred_Rforest)
#     if i==20:
#         error_best_est = error
#     else:
#         if error < error_best_est:
#             error_best_est = error; best_est_n = i

# print(f'Linear Regression MAPE:  {best_est_n}  max depth {error_best_est}')
# print(f'MSE {mse(ts_validation, pred_Rforest)}')
# print(f'MAE {mae(ts_validation, pred_Rforest)}')
# print(f'RMSE {rmse(ts_validation, pred_Rforest)}')

best_est = 100
for i in [100,200,300,400,500,600,700,800,900,1000]:
    model_Rforest = RandomForest(lags=27, max_depth=28, n_estimators=i)
    model_Rforest.fit(ts_training)

    # ทำนายค่าใน validation set
    pred_Rforest = model_Rforest.predict(len(ts_validation))

    # คำนวณค่า MAPE (Mean Absolute Percentage Error)
    error = mape(ts_validation, pred_Rforest)
    if i==100:
        error_best= error
    else:
        if error < error_best:
            error_best = error;best_est = i
    print(f'Random Forest MAPE: n_est {best_est} MAPE {error_best}\n')

model_Rforest = RandomForest(lags=26, max_depth=28, n_estimators=best_est)
model_Rforest.fit(ts_training)
pred_Rforest = model_Rforest.predict(len(ts_validation))

print(f'Train Random Forest MAPE: n_est {best_est} MAPE {mape(ts_validation, pred_Rforest)}')
print(f'MSE {mse(ts_validation, pred_Rforest)}')
print(f'MAE {mae(ts_validation, pred_Rforest)}')
print(f'RMSE {rmse(ts_validation, pred_Rforest)}')

In [None]:
# # note random forset lag=1, max_depth=7, n_estimators=200
# lags_in = 27
# max_depths_in = 28
# n_estimators_in = 1000

# Rmodel = RandomForest(lags=lags_in, 
#                       max_depth=max_depths_in, 
#                       n_estimators=n_estimators_in
#                       )
# Rmodel.fit(ts_training)
pred = model_Rforest.predict(30)

plt.figure(figsize=(20, 10)) 

ts_training.plot(label="training")
ts_validation.plot(label="validation")
pred.plot(label="forecast")


In [None]:
from darts.metrics import mape, mse, mae, rmse
lags_in = 26
max_depths_in = 9
n_estimators_in = 400

best_mape = 100
for i in range(1,31):
    Rmodel = RandomForest(lags=lags_in, 
                        max_depth=max_depths_in, 
                        n_estimators=n_estimators_in
                        )
    Rmodel.fit(ts_training)
    pred_lr = Rmodel.predict(len(ts_validation))
    mape_value = mape(ts_validation, pred_lr)

    if mape_value < best_mape:
        Best_model = Rmodel
        best_mape = mape_value
        print(f'\nRandom Forest MAPE: lag {lags_in} max_depth {max_depths_in}  n_estimator {n_estimators_in} MAPE {mape_value}')
        print(f'MSE {mse(ts_validation, pred_lr)}')
        print(f'MAE {mae(ts_validation, pred_lr)}')
        print(f'RMSE {rmse(ts_validation, pred_lr)}\n')
    print(f'epoch {i}')

In [None]:
from darts.models import LinearRegressionModel, Prophet

Best_model.save("model/RandomForestModel_totalOrder_13Jun67_V1_1.pkl")

In [None]:
# Predict
best_model= RandomForest.load("model/RandomForestModel_totalOrder_13Jun67_V1_1.pkl")
best_model.fit(ts_order_quantity)

# ts_order_quantity.pd_dataframe().tail()

In [None]:
forecast = best_model.predict(7)
forecast = forecast.pd_dataframe().reset_index()
forecast.columns = ['Date', 'Total Order']
forecast.tail(7)

In [None]:
from darts.models import LinearRegressionModel, Prophet

best_model.save("model/RandomForestModel_totalOrder_13Jun67_V1_1.pkl")

# CatBoostModel

In [None]:
from darts.models import (
    CatBoostModel
)

# สร้างและฝึก Linear Regression Model
best_lag = 1
for i in range(1,31):
    model_Cat = CatBoostModel(lags=i)
    model_Cat.fit(ts_training)

    # ทำนายค่าใน validation set
    pred_Cat = model_Cat.predict(len(ts_validation))

    # คำนวณค่า MAPE (Mean Absolute Percentage Error)
    error_lr = mape(ts_validation, pred_Cat)
    if i==1:
        error_best_lag = error_lr
    else:
        if error_lr < error_best_lag:
            error_best_lag = error_lr; best_lag = i

print(f'Linear Regression MAPE: lag {best_lag} {error_best_lag}')
print(f'MSE {mse(ts_validation, pred_Cat)}')
print(f'MAE {mae(ts_validation, pred_Cat)}')
print(f'RMSE {rmse(ts_validation, pred_Cat)}')