In [144]:
import numpy as np
import pandas as pd

import datetime
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GroupKFold

from scipy import stats
from sklearn import preprocessing 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.scorer import make_scorer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_uniform
from statsmodels.tsa.stattools import adfuller

import statsmodels
from sklearn.compose import ColumnTransformer
import statsmodels.api as sm
from statsmodels.formula.api import ols

import matplotlib.pylab as plt
%matplotlib inline
import seaborn as sns
from pandas import Timestamp

from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.svm import SVR
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt 
from tbats import TBATS, BATS

import xgboost as xgb
from xgboost import XGBRegressor 
from xgboost.sklearn import XGBClassifier # sklearn’s Grid Search with parallel processing
from xgboost import plot_importance
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
from catboost import CatBoostRegressor
from pmdarima.arima import auto_arima
pd.set_option('display.max_rows',None)

import itertools

import warnings
warnings.filterwarnings('ignore')

In [11]:
data = pd.read_csv('DF_File_sample.csv')
data['SKU_Customer'] = data['DemandCustomer'] + '_' + data['SKU10']
data_labels = data['SKU_Customer'] 
data.drop(['DemandCustomer', 'SKU10'], axis =1, inplace = True)

In [12]:
data['year_week'] = data['Year'].astype(str) + '-' + data['Week_No'].astype(str)
data['date'] = data['year_week'].apply(lambda x: datetime.datetime.strptime(x + '-4',  "%G-%V-%w"))

In [13]:
first_null = data.groupby('date').Sales.sum().loc[lambda x: x == 0].sort_values().index[0]
data = data[data.date < first_null]

first_26_week = pd.Series(sorted(data['date'].unique())).iloc[-26]
# first_34_week = pd.Series(sorted(data['date'].unique())).iloc[-34]

data.set_index('date', inplace = True)
data = data.iloc[np.lexsort((data.SKU_Customer.values, data.index)), [5, 7]]

In [15]:
train = data[data.index < first_26_week]
test = data[data.index >= first_26_week]

In [None]:
perc = 25
lenth = int(len(train)*(perc/100))
valid = train.tail(lenth)
Train = train.tail(-lenth)

## Modeling:

In [104]:
def mape(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return (np.mean(np.abs(y_true - y_pred)) / np.mean(y_true)) * 100    

In [106]:
def mape_by_month(y_true, y_pred): 
    y_true2 = pd.DataFrame(y_true.copy())
    y_true2['day'] = pd.to_datetime(y_true2.index, format='%d-%m-%Y',infer_datetime_format=True)
    y_true2['month'] = y_true2.day.dt.month
    y_true2['year'] = y_true2.day.dt.year  
    y_true2['Sales_pred'] = np.where(y_pred < 0, 0, y_pred)
    pred = y_true2.groupby(['SKU_Customer', 'year', 'month']).Sales_pred.sum().values
    true = y_true2.groupby(['SKU_Customer', 'year', 'month']).Sales.sum().values
    return mape(true, pred)

### Avto_ARIMA:

In [58]:
SKU_Customer_list = data.SKU_Customer.unique()

In [59]:
period = 26
previous_df = pd.DataFrame()
forecast_dates = pd.to_datetime(test.index.unique().sort_values().values, format = '%Y-%m-%d')

for i in SKU_Customer_list:
    model = auto_arima(train[train.SKU_Customer == i].Sales, 
                         start_p = 1,
                         start_q = 1,
                         max_p = 3,
                         max_q = 3,
                         m=12,
                         start_P = 0,
                         seasonal = True,
                         d =1,
                         D = 1,
                         trace=True,
                         error_action='ignore',  
                         suppress_warnings=True, 
                         stepwise=True)
    
    model.fit(train[train.SKU_Customer == i].Sales)

    future_forecast, confint  = model.predict(n_periods = period, return_conf_int=True)
    last_df = pd.DataFrame({'date': forecast_dates, 
                            'SKU_Customer': i,
                            'prediction': future_forecast,
                            'conf_low': confint[ :,0],
                            'conf_upper': confint[ :,1]})
    
    previous_df = pd.concat([previous_df, last_df], ignore_index = True)  

Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=3091.775, BIC=3107.877, Fit time=1.957 seconds
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 0, 12); AIC=3267.018, BIC=3273.458, Fit time=0.116 seconds
Fit ARIMA: order=(1, 1, 0) seasonal_order=(1, 1, 0, 12); AIC=3175.254, BIC=3188.135, Fit time=0.556 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 1, 12); AIC=3090.142, BIC=3103.024, Fit time=1.187 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(1, 1, 1, 12); AIC=3090.344, BIC=3106.446, Fit time=1.832 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 0, 12); AIC=3193.863, BIC=3203.524, Fit time=0.293 seconds
Fit ARIMA: order=(0, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=3089.725, BIC=3105.827, Fit time=3.295 seconds
Fit ARIMA: order=(1, 1, 1) seasonal_order=(0, 1, 2, 12); AIC=3091.357, BIC=3110.679, Fit time=3.965 seconds
Fit ARIMA: order=(0, 1, 0) seasonal_order=(0, 1, 2, 12); AIC=3151.689, BIC=3164.570, Fit time=2.322 seconds
Fit ARIMA: order=(0, 1, 2) s

In [122]:
pred_ARIMA = pd.merge(test.reset_index(), previous_df, on = ['date', 'SKU_Customer']).set_index('date')
mape_by_month(pred_ARIMA, pred_ARIMA['prediction'])

45.67192254187942

## Holt Winters:

In [142]:
period = 26
previous_df_HW = pd.DataFrame()
forecast_dates = pd.to_datetime(test.index.unique().sort_values().values, format = '%Y-%m-%d')

for i in SKU_Customer_list:
    model_HW = ExponentialSmoothing(np.asarray(train[train.SKU_Customer == i].Sales),seasonal_periods=52, trend='add', seasonal='add')
    model_HW = model_HW.fit(smoothing_level=0.1, smoothing_slope=0.001, smoothing_seasonal=0.05)
    pred_HW = model_HW.forecast(period) 

    last_df_HW = pd.DataFrame({'date': forecast_dates, 
                            'SKU_Customer': i,
                            'prediction': pred_HW})
    
    previous_df_HW = pd.concat([previous_df_HW, last_df_HW], ignore_index = True)  

In [None]:
pred_HW = pd.merge(test.reset_index(), previous_df_HW, on = ['date', 'SKU_Customer']).set_index('date')
mape_by_month(pred_HW, pred_HW['prediction'])

In [206]:
# testing

period = 26
previous_df_HW = pd.DataFrame()
forecast_dates = pd.to_datetime(test.index.unique().sort_values().values, format = '%Y-%m-%d')

mape_pred = 150
for alpha in np.arange(0, 1, .1):
    for beta in np.arange(0, 1, .1):
        for gamma in np.arange(0, 1, .1):           
            for i in SKU_Customer_list:
                model_HW = ExponentialSmoothing(np.asarray(train[train.SKU_Customer == i].Sales),seasonal_periods=52, trend='add', seasonal='add')
                model_HW = model_HW.fit(smoothing_level=alpha, smoothing_slope=beta, smoothing_seasonal=gamma)
                pred_HW = model_HW.forecast(period) 

                last_df_HW = pd.DataFrame({'date': forecast_dates, 
                                        'SKU_Customer': i,
                                        'prediction': pred_HW})
    
                previous_df_HW = pd.concat([previous_df_HW, last_df_HW], ignore_index = True)  
                pred_HW = pd.merge(test.reset_index(), previous_df_HW, on = ['date', 'SKU_Customer']).set_index('date')
                mape_potential_pred = mape_by_month(pred_HW, pred_HW['prediction'])
                if mape_potential_pred < mape_pred:
                    mape_pred = mape_potential_pred
                    param = [alpha, beta, gamma]
    

KeyboardInterrupt: 

In [204]:
param

[0.0, 0.0, 0.0]

In [None]:
for i in SKU_Customer_list:
    model_HW = ExponentialSmoothing(np.asarray(train[train.SKU_Customer == i].Sales),seasonal_periods=52, trend='add', seasonal='add')
    model_HW = model_HW.fit(smoothing_level=param[0], smoothing_slope=param[1], smoothing_seasonal=param[2])
    pred_HW = model_HW.forecast(period) 

    last_df_HW = pd.DataFrame({'date': forecast_dates, 
                            'SKU_Customer': i,
                            'prediction': pred_HW})
    
    previous_df_HW = pd.concat([previous_df_HW, last_df_HW], ignore_index = True) 

In [202]:
pred_HW = pd.merge(test.reset_index(), previous_df_HW, on = ['date', 'SKU_Customer']).set_index('date')
mape_by_month(pred_HW, pred_HW['prediction'])

29.030974020832474

## TBATS:

In [146]:
period = 26
previous_df_TBATS = pd.DataFrame()
forecast_dates = pd.to_datetime(test.index.unique().sort_values().values, format = '%Y-%m-%d')
estimator = TBATS(seasonal_periods = [52.18])

for i in SKU_Customer_list:
    model_TBATS = estimator.fit(train[train.SKU_Customer == i].Sales)
    pred_TBATS = model_TBATS.forecast(steps = period)

    last_df = pd.DataFrame({'date': forecast_dates, 
                            'SKU_Customer': i,
                            'prediction': pred_TBATS})
    
    previous_df_TBATS = pd.concat([previous_df_TBATS, last_df], ignore_index = True)  

In [147]:
pred_TBATS = pd.merge(test.reset_index(), previous_df_TBATS, on = ['date', 'SKU_Customer']).set_index('date')
mape_by_month(pred_TBATS, pred_TBATS['prediction'])

39.42492818923642

## Prophet:

In [148]:
from fbprophet import Prophet
import logging

logging.getLogger().setLevel(logging.ERROR)

In [149]:
period = 26

train_Prophet = train[['SKU_Customer', 'Sales']].reset_index()
test_Prophet = test[['SKU_Customer', 'Sales']].reset_index()
train_Prophet.columns = ['ds', 'SKU_Customer', 'y']
test_Prophet.columns = ['ds', 'SKU_Customer', 'y']

In [189]:
period = 26
previous_df_Prophet = pd.DataFrame()
forecast_dates = pd.to_datetime(test.index.unique().sort_values().values, format = '%Y-%m-%d')

for i in SKU_Customer_list:
    model_Prophet = Prophet()
    model_Prophet.fit(train_Prophet[train_Prophet.SKU_Customer == i].loc[:, ['ds', 'y']])
    future_Prophet = model_Prophet.make_future_dataframe(periods=period, freq = 'W-THU')
    pred_Prophet = model_Prophet.predict(future_Prophet)
    pred_Prophet = pred_Prophet.loc[-period:, 'yhat']
    
    last_df_Prophet = pd.DataFrame({'date': forecast_dates, 
                            'SKU_Customer': i,
                            'prediction': pred_Prophet})
    
    previous_df_Prophet = pd.concat([previous_df_Prophet, last_df_Prophet], ignore_index = True)  

In [190]:
pred_Prophet = pd.merge(test.reset_index(), previous_df_Prophet, on = ['date', 'SKU_Customer']).set_index('date')
mape_by_month(pred_TBATS, pred_Prophet['prediction'])

47.97456532684557