In [1]:
#@title INSTALL PACKAGES
!pip install statsforecast mlforecast neuralforecast dask-expr

Collecting statsforecast
  Downloading statsforecast-1.7.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (28 kB)
Collecting mlforecast
  Downloading mlforecast-0.13.5-py3-none-any.whl.metadata (12 kB)
Collecting neuralforecast
  Downloading neuralforecast-1.7.5-py3-none-any.whl.metadata (14 kB)
Collecting dask-expr
  Downloading dask_expr-1.1.16-py3-none-any.whl.metadata (2.5 kB)
Collecting cloudpickle (from statsforecast)
  Downloading cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB)
Collecting coreforecast>=0.0.12 (from statsforecast)
  Downloading coreforecast-0.0.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting numba>=0.55.0 (from statsforecast)
  Downloading numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.7 kB)
Collecting statsmodels>=0.13.2 (from statsforecast)
  Downloading statsmodels-0.14.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.2 kB)
Collec

In [None]:
!pip install lightgbm

In [11]:
#@title IMPORT LIBRARY
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime

from tqdm import tqdm
from utilsforecast.plotting import plot_series

from statsforecast.core import StatsForecast
from statsforecast.models import AutoARIMA, Naive

from statsforecast.models import *

from mlforecast import MLForecast

import lightgbm as lgb
from mlforecast.target_transforms import Differences
from mlforecast.lag_transforms import ExpandingMean, RollingMean, ExpandingMax, ExpandingMin, ExpandingStd

from utilsforecast.evaluation import evaluate
from utilsforecast.losses import rmse, mse
from mlforecast.utils import generate_daily_series, generate_prices_for_series

os.environ['NIXTLA_ID_AS_COL'] = '1'
pd.options.mode.chained_assignment = None


In [4]:
#@title BASE FUNCTIONS
def retrieve_data(sales_dir, price_dir):
    sales_raw = pd.read_csv(sales_dir)
    price_raw = pd.read_csv(price_dir)

    cols = sales_raw.columns
    dates = cols[3:]

    sales = sales_raw.melt(id_vars=cols[:3], var_name='Date', value_vars=dates, value_name='Sales')
    price = price_raw.melt(id_vars=cols[:3], var_name='Date', value_vars=dates, value_name='Sales')

    sales['unique_id'] = sales['Client'].astype(str) + '-' + sales['Warehouse'].astype(str) + '-' + sales['Product'].astype(str)
    price['unique_id'] = price['Client'].astype(str) + '-' + price['Warehouse'].astype(str) + '-' + price['Product'].astype(str)

    sales = sales.rename(columns={'Date': 'ds', 'Sales': 'y'})
    price = price.rename(columns={'Date': 'ds', 'Sales': 'price'})
    sales['ds'] = pd.to_datetime(sales['ds'])
    price['ds'] = pd.to_datetime(price['ds'])

    return sales, price

def mean_impute(series):
    series = series.fillna(series.rolling(window=3, min_periods=1, center=True).mean())
    series = series.ffill()
    series = series.bfill()

    return series

In [5]:
#@title VARIABLE SETTINGS

sales_dir = '../phase_0/Phase 0 - Sales.csv'
price_dir = '../phase_0/Phase 0 - Price.csv'
sales, price = retrieve_data(sales_dir, price_dir)
price['price'] = mean_impute(price['price'])

freq = 'W-MON'
horizon=13
season_length = 52

In [7]:
sales_dir = '../phase_1/Phase 1 - Sales.csv'
price_dir = '../phase_1/Phase 1 - Price.csv'

sales_phase_1, price = retrieve_data(sales_dir, price_dir)

sales_phase_1

Unnamed: 0,Client,Warehouse,Product,ds,y,unique_id
0,0,1,367,2023-10-09,1.0,0-1-367
1,0,1,639,2023-10-09,5.0,0-1-639
2,0,1,655,2023-10-09,6.0,0-1-655
3,0,1,1149,2023-10-09,1.0,0-1-1149
4,0,1,1485,2023-10-09,0.0,0-1-1485
...,...,...,...,...,...,...
195684,46,318,13485,2024-01-01,0.0,46-318-13485
195685,46,318,13582,2024-01-01,67.0,46-318-13582
195686,46,318,13691,2024-01-01,2.0,46-318-13691
195687,46,318,13946,2024-01-01,0.0,46-318-13946


In [8]:
sales = pd.concat([sales, sales_phase_1], axis=0)
sales

Unnamed: 0,Client,Warehouse,Product,ds,y,unique_id
0,0,1,367,2020-07-06,7.0,0-1-367
1,0,1,639,2020-07-06,0.0,0-1-639
2,0,1,655,2020-07-06,21.0,0-1-655
3,0,1,1149,2020-07-06,7.0,0-1-1149
4,0,1,1485,2020-07-06,0.0,0-1-1485
...,...,...,...,...,...,...
195684,46,318,13485,2024-01-01,0.0,46-318-13485
195685,46,318,13582,2024-01-01,67.0,46-318-13582
195686,46,318,13691,2024-01-01,2.0,46-318-13691
195687,46,318,13946,2024-01-01,0.0,46-318-13946


In [None]:
#@title THETA MODEL
sf = StatsForecast(
    # models=[Theta(season_length=52, decomposition_type="additive")],
    models=[DynamicTheta(season_length=52, decomposition_type="additive")],
    # models=[ADIDA()],
    freq='W-MON',
    fallback_model = Naive(),
    n_jobs=-1,
)

# forecast_df = sf.forecast(df=sales, h=13)
sf.fit(df=sales)
forecast_df = sf.predict(h=horizon)

In [None]:
#@title MFLES MODEL
seasonal_period = 52

config = {
    'seasonality_weights': [True, False],
    'smoother': [True, False],
    'ma': [52, 12, 8, 4, None]
}

# model = AutoMFLES(season_length=52, test_size=17, n_windows=3, metric='mse', config=config)
model = AutoMFLES(season_length=52, test_size=13, n_windows=4, metric='mse', config=config)

sf = StatsForecast(
    models=[model],
    fallback_model = Naive(),
    freq='W-MON',
    n_jobs=-1,
    verbose=True
)

sf.fit(df = sales)

# Generate test predictions
forecast_df = sf.predict(h=horizon)

In [None]:
#@title MAKE SUBMISSION
def make_submission(forecast_df):
    submission = forecast_df.copy()
    values = forecast_df.columns[-1]

    submission[values] = submission[values].clip(0)
    submission[['Client', 'Warehouse', 'Product']] = submission['unique_id'].str.split('-', expand=True)
    submission = submission.drop(columns='unique_id')
    submission['ds'] = submission['ds'].astype(str)

    submission = submission.pivot(columns='ds', index=['Client', 'Warehouse', 'Product'], values=values).reset_index()
    submission[['Client', 'Warehouse', 'Product']] = submission[['Client', 'Warehouse', 'Product']].astype(int)
    submission = submission.sort_values(['Client', 'Warehouse', 'Product'])

    # submission = submission.map(lambda x: x if x>=0 else 0)

    current_datetime = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    submission.to_csv('../mfles2.csv', index=None)

    return submission

submission = make_submission(forecast_df)