In [None]:
#@title INSTALL PACKAGES
!pip install statsforecast mlforecast neuralforecast dask-expr

In [1]:
#@title IMPORT LIBRARY
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime

from tqdm import tqdm
from utilsforecast.plotting import plot_series

from statsforecast.core import StatsForecast
from statsforecast.models import AutoARIMA, Naive

from statsforecast.models import *

from mlforecast import MLForecast

import lightgbm as lgb
from mlforecast.target_transforms import Differences
from mlforecast.lag_transforms import ExpandingMean, RollingMean, ExpandingMax, ExpandingMin, ExpandingStd

from utilsforecast.evaluation import evaluate
from utilsforecast.losses import rmse, mse
from mlforecast.utils import generate_daily_series, generate_prices_for_series

os.environ['NIXTLA_ID_AS_COL'] = '1'
pd.options.mode.chained_assignment = None


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#@title BASE FUNCTIONS
def retrieve_data(sales_dir, price_dir):
    sales_raw = pd.read_csv(sales_dir)
    price_raw = pd.read_csv(price_dir)

    cols = sales_raw.columns
    dates = cols[3:]

    sales = sales_raw.melt(id_vars=cols[:3], var_name='Date', value_vars=dates, value_name='Sales')
    price = price_raw.melt(id_vars=cols[:3], var_name='Date', value_vars=dates, value_name='Sales')

    sales['unique_id'] = sales['Client'].astype(str) + '-' + sales['Warehouse'].astype(str) + '-' + sales['Product'].astype(str)
    price['unique_id'] = price['Client'].astype(str) + '-' + price['Warehouse'].astype(str) + '-' + price['Product'].astype(str)

    sales = sales.rename(columns={'Date': 'ds', 'Sales': 'y'})
    price = price.rename(columns={'Date': 'ds', 'Sales': 'price'})
    sales['ds'] = pd.to_datetime(sales['ds'])
    price['ds'] = pd.to_datetime(price['ds'])

    return sales, price

def mean_impute(series):
    series = series.fillna(series.rolling(window=3, min_periods=1, center=True).mean())
    series = series.ffill()
    series = series.bfill()

    return series


In [3]:
#@title VARIABLE SETTINGS

sales_dir = '../phase_0/Phase 0 - Sales.csv'
price_dir = '../phase_0/Phase 0 - Price.csv'
sales, price = retrieve_data(sales_dir, price_dir)
price['price'] = mean_impute(price['price'])

freq = 'W-MON'
horizon=13
season_length = 52

In [4]:
sales_dir = '../phase_1/Phase 1 - Sales.csv'
price_dir = '../phase_1/Phase 1 - Price.csv'

sales_phase_1, price = retrieve_data(sales_dir, price_dir)

sales_phase_1

Unnamed: 0,Client,Warehouse,Product,ds,y,unique_id
0,0,1,367,2023-10-09,1.0,0-1-367
1,0,1,639,2023-10-09,5.0,0-1-639
2,0,1,655,2023-10-09,6.0,0-1-655
3,0,1,1149,2023-10-09,1.0,0-1-1149
4,0,1,1485,2023-10-09,0.0,0-1-1485
...,...,...,...,...,...,...
195684,46,318,13485,2024-01-01,0.0,46-318-13485
195685,46,318,13582,2024-01-01,67.0,46-318-13582
195686,46,318,13691,2024-01-01,2.0,46-318-13691
195687,46,318,13946,2024-01-01,0.0,46-318-13946


In [5]:
sales = pd.concat([sales, sales_phase_1], axis=0)
sales

Unnamed: 0,Client,Warehouse,Product,ds,y,unique_id
0,0,1,367,2020-07-06,7.0,0-1-367
1,0,1,639,2020-07-06,0.0,0-1-639
2,0,1,655,2020-07-06,21.0,0-1-655
3,0,1,1149,2020-07-06,7.0,0-1-1149
4,0,1,1485,2020-07-06,0.0,0-1-1485
...,...,...,...,...,...,...
195684,46,318,13485,2024-01-01,0.0,46-318-13485
195685,46,318,13582,2024-01-01,67.0,46-318-13582
195686,46,318,13691,2024-01-01,2.0,46-318-13691
195687,46,318,13946,2024-01-01,0.0,46-318-13946


In [None]:
#@title THETA MODEL
sf = StatsForecast(
    # models=[Theta(season_length=52, decomposition_type="additive")],
    models=[DynamicTheta(season_length=52, decomposition_type="additive")],
    # models=[ADIDA()],
    freq='W-MON',
    fallback_model = Naive(),
    n_jobs=-1,
)

# forecast_df = sf.forecast(df=sales, h=13)
sf.fit(df=sales)
forecast_df = sf.predict(h=horizon)

In [None]:
#@title MFLES MODEL
seasonal_period = 52

config = {
    'seasonality_weights': [True, False],
    'smoother': [True, False],
    'ma': [52, 26, 12, 8, 4, None],
    'seasonal_period': [None, seasonal_period],
}

model = AutoMFLES(season_length=52, test_size=17, n_windows=3, metric='mse', config=config)

sf = StatsForecast(
    models=[model],
    fallback_model = Naive(),
    freq='W-MON',
    n_jobs=-1,
    verbose=True
)

sf.fit(df = sales)

# Generate test predictions
forecast_df = sf.predict(h=horizon)

In [None]:
#@title MAKE SUBMISSION
def make_submission(forecast_df):
    submission = forecast_df.copy()
    values = forecast_df.columns[-1]

    submission[values] = submission[values].clip(0)
    submission[['Client', 'Warehouse', 'Product']] = submission['unique_id'].str.split('-', expand=True)
    submission = submission.drop(columns='unique_id')
    submission['ds'] = submission['ds'].astype(str)

    submission = submission.pivot(columns='ds', index=['Client', 'Warehouse', 'Product'], values=values).reset_index()
    submission[['Client', 'Warehouse', 'Product']] = submission[['Client', 'Warehouse', 'Product']].astype(int)
    submission = submission.sort_values(['Client', 'Warehouse', 'Product'])

    # submission = submission.map(lambda x: x if x>=0 else 0)

    current_datetime = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
    submission.to_csv('../mfles1.csv', index=None)

    return submission

submission = make_submission(forecast_df)

In [None]:
factor1 = pd.read_csv('/content/drive/MyDrive/sales-forecasting/phase_2/2024_10_17_12_21_46_DynamicTheta.csv').set_index(['Client', 'Warehouse', 'Product'])
factor2 = pd.read_csv('/content/drive/MyDrive/sales-forecasting/phase_2/2024_10_17_13_43_10_LightGBM.csv').set_index(['Client', 'Warehouse', 'Product'])
factor3 = pd.read_csv('/content/drive/MyDrive/sales-forecasting/phase_2/2024_10_17_14_30_20_AutoMFLES.csv').set_index(['Client', 'Warehouse', 'Product'])
factor4 = pd.read_csv('/content/drive/MyDrive/sales-forecasting/phase_2/2024_10_17_14_53_25_AutoMFLES.csv').set_index(['Client', 'Warehouse', 'Product'])

In [None]:
factor4.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,2024-01-08,2024-01-15,2024-01-22,2024-01-29,2024-02-05,2024-02-12,2024-02-19,2024-02-26,2024-03-04,2024-03-11,2024-03-18,2024-03-25,2024-04-01
Client,Warehouse,Product,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,1,367,2.98438,3.433777,3.434645,3.110621,2.78669,2.638494,2.652942,2.806055,3.100078,3.405451,3.42907,2.992352,2.317542
0,1,639,6.242716,7.111948,6.498316,5.473026,5.169234,5.675168,6.221362,6.216154,5.825238,5.554923,5.554262,5.532687,5.2534
0,1,655,0.0,0.091226,1.180747,1.267598,0.945629,0.756104,0.684089,0.569054,0.574125,0.944295,1.466028,1.54463,0.919043
0,1,1149,3.304974,3.462181,3.358088,3.043256,2.871192,3.102433,3.5826,3.893397,3.809855,3.524749,3.386408,3.484659,3.583269
0,1,1485,2.145081,1.375713,0.216742,0.18026,0.689664,0.230233,0.0,0.0,2.663412,6.513983,7.509914,4.672852,0.809471


In [None]:
factor = ((3*factor2).add(factor1).add(factor3).add(factor4) / 6).reset_index()
factor.head()

Unnamed: 0,Client,Warehouse,Product,2024-01-08,2024-01-15,2024-01-22,2024-01-29,2024-02-05,2024-02-12,2024-02-19,2024-02-26,2024-03-04,2024-03-11,2024-03-18,2024-03-25,2024-04-01
0,0,1,367,1.13521,1.19079,1.200815,1.225646,1.180567,1.17088,1.193142,1.246179,1.26072,1.323287,1.321253,1.235492,1.117951
1,0,1,639,4.260584,4.174735,4.643793,4.049952,4.294598,4.429044,4.893744,4.741255,4.424594,4.660938,4.528183,4.704147,4.529294
2,0,1,655,2.192745,2.462009,2.712097,3.226722,2.901257,2.750104,3.114576,3.291928,3.086705,3.489718,3.635651,3.560831,3.505229
3,0,1,1149,1.56835,1.419299,1.389156,1.378145,1.437221,1.495928,1.512584,1.607132,1.539555,1.569974,1.468075,1.463864,1.501505
4,0,1,1485,3.311561,3.653183,3.858132,4.511695,4.642169,4.372427,4.419312,4.848599,6.798932,8.344898,9.365561,7.849181,6.355713


In [None]:
factor.to_csv('final_submission_phase_2_hell_yeah.csv', index=False)