In [1]:
from google.colab import files
uploaded = files.upload()
import pandas as pd

# EDA sương sương
df = pd.read_csv('supply_chain_deliveries.csv')
df.info()
df.describe()
df.isnull()

# Chuyển cột workdate thành datetime
df['WorkDate'] = pd.to_datetime(df['WorkDate'], errors='coerce')

# Set trục thời gian để resample
df = df.set_index('WorkDate')

# Resample theo tuần
weekly_data = df['NumberOfPieces'].resample('W').sum()
# Resample theo ngày, theo tháng (chỉ để đối chiếu, không phải mục tiêu)
daily_data = df['NumberOfPieces'].resample('D').sum()
monthly_data = df['NumberOfPieces'].resample('M').sum()

# Group theo Customer & Location
weekly_customer_location = df.groupby(['Customer', 'Location'])['NumberOfPieces'].resample('W').sum().reset_index()

# Chia dữ liệu Train - Test (2020 - 2024 train, 2025 test)
train = weekly_data[:'2024']
test = weekly_data['2025':]

# Baseline Model: Chọn Naive Forecast, Exponential Smoothing (Holt-Winters)
## Naive Forecast
naive_forecast = test.copy()
naive_forecast[:] = train.iloc[-1]
## Exponential Smoothing (Holt-Winters)
from statsmodels.tsa.holtwinters import ExponentialSmoothing
hw_model = ExponentialSmoothing(train, trend= 'add', seasonal= 'add', seasonal_periods= 52).fit()
hw_forecast = hw_model.forecast(len(test))
print(hw_forecast.head())

# Time-series Model: SARIMA, Prophet
## SARIMA
import itertools
import warnings
from statsmodels.tsa.statespace.sarimax import SARIMAX
warnings.filterwarnings('ignore')
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
P = D = Q = [0, 1]
seasonal_pdq = [(0,1,0,52), (1,1,0,52)]
model = SARIMAX(train, order=(1,0,1), seasonal_order=(1,1,0,52))
results = model.fit(disp=False)
print(results.summary())
best_aic = float('inf')
best_param= None
for param in pdq:
  for param_seasonal in seasonal_pdq:
    try:
      model = SARIMAX(train, order= param, seasonal_order= param_seasonal, enforce_stationarity=False, enforce_invertibility= False)
      results = model.fit(disp= False)
      if results.aic < best_aic:
        best_aic = results.aic
        best_param = (param, param_seasonal)
    except:
      continue
print('Best SARIMA params:', best_param, 'AIC:', best_aic)
best_order, best_seasonal = best_param
sarima_model = SARIMAX(train, order= best_order, seasonal_order= best_seasonal, enforce_stationarity=False, enforce_invertibility=False).fit()
print(sarima_model.summary())
sarima_forecast = sarima_model.forecast(len(test))
from sklearn.metrics import mean_absolute_percentage_error
mape = mean_absolute_percentage_error(test, sarima_forecast)
print('MAPE:', mape)
## Prophet
from prophet import Prophet
prophet_df = weekly_data.reset_index()[['WorkDate', 'NumberOfPieces']]
prophet_df.columns = ['ds', 'y']
train_prophet = prophet_df[prophet_df['ds'] < '2025-01-01']
test_prophet = prophet_df[prophet_df['ds'] >= '2025-01-01']
model = Prophet(yearly_seasonality= True, weekly_seasonality= False, daily_seasonality= False)
model.fit(train_prophet)
future = model.make_future_dataframe(periods= len(test_prophet), freq= 'W')
forecast = model.predict(future)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
from prophet.plot import plot_plotly, plot_components_plotly
fig1 = plot_plotly(model, forecast)
fig1.show()
fig2 = plot_components_plotly(model, forecast)
fig2.show()
from sklearn.metrics import mean_absolute_percentage_error
prophet_pred = forecast[forecast['ds'] >= '2025-01-01']['yhat'].values
prophet_actual = test_prophet['y'].values
mape_prophet = mean_absolute_percentage_error(prophet_actual, prophet_pred)
print('Prophet MAPE:', mape_prophet)

Saving supply_chain_deliveries.csv to supply_chain_deliveries.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126255 entries, 0 to 126254
Data columns (total 7 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   WorkDate        126255 non-null  object 
 1   Customer        126255 non-null  object 
 2   Location        126255 non-null  object 
 3   BusinessType    126255 non-null  object 
 4   OrderCount      126255 non-null  int64  
 5   NumberOfPieces  126255 non-null  int64  
 6   TotalRevenue    126255 non-null  float64
dtypes: float64(1), int64(2), object(4)
memory usage: 6.7+ MB


  monthly_data = df['NumberOfPieces'].resample('M').sum()


2025-01-05     92577.637401
2025-01-12     94165.579538
2025-01-19     99756.980768
2025-01-26     98129.037525
2025-02-02    101605.095724
Freq: W-SUN, dtype: float64
                                      SARIMAX Results                                      
Dep. Variable:                      NumberOfPieces   No. Observations:                  261
Model:             SARIMAX(1, 0, 1)x(1, 1, [], 52)   Log Likelihood               -2094.966
Date:                             Tue, 02 Sep 2025   AIC                           4197.932
Time:                                     07:32:22   BIC                           4211.302
Sample:                                 01-05-2020   HQIC                          4203.338
                                      - 12-29-2024                                         
Covariance Type:                               opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
-----------------

DEBUG:cmdstanpy:input tempfile: /tmp/tmp8mehsb_d/3s4is8y_.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp8mehsb_d/cyskn1pf.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=56046', 'data', 'file=/tmp/tmp8mehsb_d/3s4is8y_.json', 'init=/tmp/tmp8mehsb_d/cyskn1pf.json', 'output', 'file=/tmp/tmp8mehsb_d/prophet_model4r1ca2bg/prophet_model-20250902073348.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
07:33:48 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
07:33:48 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


            ds          yhat    yhat_lower    yhat_upper
283 2025-06-08  52145.253811  45548.190358  58511.890926
284 2025-06-15  53556.820779  46972.660169  59798.037728
285 2025-06-22  52878.234450  46493.467023  59482.243055
286 2025-06-29  50693.806700  44025.891661  57301.731136
287 2025-07-06  49010.561802  42090.691949  55511.600082


Prophet MAPE: 0.18947000787737084
