# Import necessary packages

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

from random import sample

import seaborn as sns
import matplotlib.pyplot as plt

from statsmodels.tsa.arima.model import ARIMA

from sklearn.metrics import mean_absolute_error, mean_squared_error

from timeit import default_timer as timer

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import original data and preprocessed data

In [3]:
sales_train_val = pd.read_csv('/content/drive/MyDrive/Colab Notebooks_Daily Prediction_Step by Step/sales_train_validation.csv')

**Note: This Step is for the extraction of California State and Foods category**

In [4]:
sales_train_val = sales_train_val[(sales_train_val['state_id'] == 'CA') & (sales_train_val['cat_id'] == 'HOBBIES')]

In [5]:
INPUT_DIR_2 = '/content/drive/MyDrive/Colab Notebooks_Daily Prediction_Step by Step/List of Product ID according to 4 demand patterns/California/Hobbies/2-year'
list_intermittent = pd.read_csv(f'{INPUT_DIR_2}/Intermittent_ID_2_Year_Data.csv')
list_lumpy = pd.read_csv(f'{INPUT_DIR_2}/Lumpy_ID_2_Year_Data.csv')
list_erratic = pd.read_csv(f'{INPUT_DIR_2}/Erratic_ID_2_Year_Data.csv')
list_smooth = pd.read_csv(f'{INPUT_DIR_2}/Smooth_ID_2_Year_Data.csv')

list_intermittent = list_intermittent['0'].values.tolist()
list_lumpy = list_lumpy['0'].values.tolist()
list_erratic = list_erratic['0'].values.tolist()
list_smooth = list_smooth['0'].values.tolist()

sales_intermittent = sales_train_val[sales_train_val.id.isin(list_intermittent)]
sales_lumpy = sales_train_val[sales_train_val.id.isin(list_lumpy)]
sales_erratic = sales_train_val[sales_train_val.id.isin(list_erratic)]
sales_smooth = sales_train_val[sales_train_val.id.isin(list_smooth)]

# User-defined functions to calculate Metrics

In [6]:
ROUNDING_DECIMAL = 4

def mase_calculation(ts, prediction):
    divisor = 0
    for i in range(1, ts.shape[0]):
        divisor = divisor + abs(ts.iloc[i] - ts.iloc[i-1])
    divisor = divisor/(ts.shape[0] - 1)
    diff    = abs(ts - prediction[:ts.shape[0]])/divisor
    mase    = diff.mean()
    return mase

def mape_calculation(actual, pred): 
    if not all([isinstance(actual, np.ndarray), isinstance(pred, np.ndarray)]):
        actual, pred = np.array(actual), np.array(pred)
    mask = (actual != 0)
    return round((np.fabs(actual - pred)/actual)[mask].mean()*100, ROUNDING_DECIMAL)

def wmape_calculation(actual, pred):
    if not all([isinstance(actual, np.ndarray), isinstance(pred, np.ndarray)]):
        actual, pred = np.array(actual), np.array(pred)
    return round((np.sum(np.absolute(actual-pred))/np.sum(actual))*100, ROUNDING_DECIMAL)

def smape_calculation(actual, predicted):
    if not all([isinstance(actual, np.ndarray), isinstance(predicted, np.ndarray)]):
        actual, predicted = np.array(actual), np.array(predicted)
    return round(np.mean(np.abs(predicted - actual) / ((np.abs(predicted) + np.abs(actual))/2))*100, ROUNDING_DECIMAL)

# Parameters settings

In [7]:
start_train_date = '2014-04-11'
end_train_date = '2016-04-10'
n_pred_days = 14
num_train_needed = 100

In [8]:
validation_training_ratio = 0.99

In [9]:
list_params = list()
for p in [0, 1, 2, 3]:
    for d in range(2):
        for q in range(4):
            list_params.append((p, d, q))

In [10]:
list_params

[(0, 0, 0),
 (0, 0, 1),
 (0, 0, 2),
 (0, 0, 3),
 (0, 1, 0),
 (0, 1, 1),
 (0, 1, 2),
 (0, 1, 3),
 (1, 0, 0),
 (1, 0, 1),
 (1, 0, 2),
 (1, 0, 3),
 (1, 1, 0),
 (1, 1, 1),
 (1, 1, 2),
 (1, 1, 3),
 (2, 0, 0),
 (2, 0, 1),
 (2, 0, 2),
 (2, 0, 3),
 (2, 1, 0),
 (2, 1, 1),
 (2, 1, 2),
 (2, 1, 3),
 (3, 0, 0),
 (3, 0, 1),
 (3, 0, 2),
 (3, 0, 3),
 (3, 1, 0),
 (3, 1, 1),
 (3, 1, 2),
 (3, 1, 3)]

In [11]:
len(list_params)

32

# This step involves specifying Demand Pattern (Intermittent or Lumpy or Erratic or Smooth)

In [12]:
sales_pattern = sales_erratic.copy()

In [13]:
sales_pattern_py = sales_pattern.copy()
list_pattern_py = sales_pattern_py.id.unique().tolist()

sales_pattern_py = sales_pattern_py.drop(['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], axis=1)
df_pattern = sales_pattern_py.melt(['id'], var_name='Date').pivot(index = ['Date'], columns = 'id', values = 'value').reset_index()
df_pattern.drop('Date', axis=1, inplace=True)
df_pattern.index = pd.date_range('2011-01-29', periods=df_pattern.shape[0], freq="D")
df_pattern.index.names = ['Date']
df_pattern = df_pattern.astype('float64')

train_data = df_pattern[(df_pattern.index >= start_train_date) & (df_pattern.index <= end_train_date)]
test_data = df_pattern.iloc[n_pred_days*-1:]

train_data_first_part = train_data.iloc[:int(len(train_data)*validation_training_ratio), :]
train_data_second_part = train_data.iloc[int(len(train_data)*validation_training_ratio):, :]

# Hyperparameter for ARIMA model

In [14]:
list_sample = sample(list_pattern_py, 50)

In [15]:
list_sample

['HOBBIES_1_341_CA_3_validation',
 'HOBBIES_1_103_CA_4_validation',
 'HOBBIES_1_256_CA_3_validation',
 'HOBBIES_1_256_CA_4_validation',
 'HOBBIES_1_261_CA_1_validation',
 'HOBBIES_1_103_CA_1_validation',
 'HOBBIES_1_074_CA_3_validation',
 'HOBBIES_1_189_CA_4_validation',
 'HOBBIES_1_234_CA_4_validation',
 'HOBBIES_1_016_CA_1_validation',
 'HOBBIES_1_369_CA_3_validation',
 'HOBBIES_1_254_CA_3_validation',
 'HOBBIES_1_048_CA_3_validation',
 'HOBBIES_1_370_CA_3_validation',
 'HOBBIES_1_256_CA_1_validation',
 'HOBBIES_1_341_CA_1_validation',
 'HOBBIES_1_015_CA_4_validation',
 'HOBBIES_1_254_CA_1_validation',
 'HOBBIES_1_014_CA_3_validation',
 'HOBBIES_1_268_CA_4_validation',
 'HOBBIES_1_244_CA_1_validation',
 'HOBBIES_1_381_CA_4_validation',
 'HOBBIES_1_404_CA_2_validation',
 'HOBBIES_1_341_CA_4_validation',
 'HOBBIES_1_404_CA_4_validation',
 'HOBBIES_1_371_CA_1_validation',
 'HOBBIES_1_019_CA_4_validation',
 'HOBBIES_1_370_CA_4_validation',
 'HOBBIES_1_008_CA_1_validation',
 'HOBBIES_1_34

In [None]:
start = timer()

df_result_all_params = pd.DataFrame()
for product in list_sample:
    train_first = train_data_first_part[product].iloc[-num_train_needed:]
    train_second = train_data_second_part[product]
    
    for combi in list_params:
        model = ARIMA(train_first, order = combi)
        model_fit = model.fit()
        forecast_temp = model_fit.forecast(len(train_second))
        df_temp = pd.DataFrame({'Date': train_second.index,
                                'Actual Data': train_second.values,
                                'Forecast': forecast_temp.values,
                                'Combination': [combi for combi_count in range(len(train_second))], 
                                'Product': [product for product_count in range(len(train_second))]})
        df_result_all_params = df_result_all_params.append(df_temp, ignore_index=False)
        
end = timer()
print('This line of code took {} minutes'.format((end-start) / 60))



This line of code took 5.94260322795 minutes




In [None]:
df_result_all_params

Unnamed: 0,Date,Actual Data,Forecast,Combination,Product
0,2016-04-03,0.0,3.539995,"(0, 0, 0)",FOODS_3_638_CA_2_validation
1,2016-04-04,4.0,3.539995,"(0, 0, 0)",FOODS_3_638_CA_2_validation
2,2016-04-05,7.0,3.539995,"(0, 0, 0)",FOODS_3_638_CA_2_validation
3,2016-04-06,7.0,3.539995,"(0, 0, 0)",FOODS_3_638_CA_2_validation
4,2016-04-07,5.0,3.539995,"(0, 0, 0)",FOODS_3_638_CA_2_validation
...,...,...,...,...,...
3,2016-04-06,12.0,7.571533,"(3, 1, 3)",FOODS_3_376_CA_1_validation
4,2016-04-07,12.0,7.263188,"(3, 1, 3)",FOODS_3_376_CA_1_validation
5,2016-04-08,10.0,6.790784,"(3, 1, 3)",FOODS_3_376_CA_1_validation
6,2016-04-09,8.0,6.494780,"(3, 1, 3)",FOODS_3_376_CA_1_validation


# Summay all metrics

In [16]:
def get_metrics_result_all_params(data):
    metrics_df = {}
    metrics_df['MASE'] = mase_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['WMAPE'] = wmape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['SMAPE'] = smape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['MAPE'] = mape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['MAE'] = mean_absolute_error(data['Actual Data'], data['Forecast'])
    metrics_df['RMSE'] = np.sqrt(mean_squared_error(data['Actual Data'], data['Forecast']))
    return pd.Series(metrics_df)

In [None]:
start = timer()

df_result_metrics_all_params = df_result_all_params.groupby(['Product', 'Combination']).apply(get_metrics_result_all_params).reset_index()

end = timer()
print('This line of code took {} minutes'.format((end-start) / 60))

This line of code took 0.07930070849999993 minutes


In [None]:
df_result_metrics_all_params

Unnamed: 0,Product,Combination,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
0,FOODS_1_004_CA_2_validation,"(0, 0, 0)",0.919148,71.1077,79.0533,87.9946,5.777499,6.789300
1,FOODS_1_004_CA_2_validation,"(0, 0, 1)",0.849026,65.6829,78.9891,87.2203,5.336734,6.342160
2,FOODS_1_004_CA_2_validation,"(0, 0, 2)",0.931665,72.0761,83.8661,90.5153,5.856180,6.664040
3,FOODS_1_004_CA_2_validation,"(0, 0, 3)",0.931370,72.0532,83.1228,89.7419,5.854326,6.803376
4,FOODS_1_004_CA_2_validation,"(0, 1, 0)",0.934659,72.3077,89.2519,46.3520,5.875000,8.492644
...,...,...,...,...,...,...,...,...
1595,FOODS_3_789_CA_1_validation,"(3, 0, 3)",0.808112,71.0428,89.2811,50.9494,4.617782,5.612926
1596,FOODS_3_789_CA_1_validation,"(3, 1, 0)",1.425413,125.3110,92.2644,241.8435,8.145216,9.415172
1597,FOODS_3_789_CA_1_validation,"(3, 1, 1)",0.791612,69.5923,78.5207,62.5868,4.523497,5.436948
1598,FOODS_3_789_CA_1_validation,"(3, 1, 2)",0.795639,69.9463,79.5309,62.3035,4.546509,5.343863


# Check what products has unexpected metrics result

In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['MASE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Combination,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
160,FOODS_1_163_CA_1_validation,"(0, 0, 0)",inf,inf,200.0,,0.000005,0.000005
161,FOODS_1_163_CA_1_validation,"(0, 0, 1)",inf,inf,200.0,,0.000005,0.000005
162,FOODS_1_163_CA_1_validation,"(0, 0, 2)",inf,inf,200.0,,0.000005,0.000005
163,FOODS_1_163_CA_1_validation,"(0, 0, 3)",inf,inf,200.0,,0.000005,0.000005
164,FOODS_1_163_CA_1_validation,"(0, 1, 0)",,,,,0.000000,0.000000
...,...,...,...,...,...,...,...,...
1019,FOODS_3_433_CA_3_validation,"(3, 0, 3)",inf,inf,200.0,,1.315690,1.319772
1020,FOODS_3_433_CA_3_validation,"(3, 1, 0)",inf,inf,200.0,,2.139663,2.140423
1021,FOODS_3_433_CA_3_validation,"(3, 1, 1)",inf,inf,200.0,,1.685651,1.685862
1022,FOODS_3_433_CA_3_validation,"(3, 1, 2)",inf,inf,200.0,,1.676526,1.677171


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['WMAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Combination,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
160,FOODS_1_163_CA_1_validation,"(0, 0, 0)",inf,inf,200.0,,0.000005,0.000005
161,FOODS_1_163_CA_1_validation,"(0, 0, 1)",inf,inf,200.0,,0.000005,0.000005
162,FOODS_1_163_CA_1_validation,"(0, 0, 2)",inf,inf,200.0,,0.000005,0.000005
163,FOODS_1_163_CA_1_validation,"(0, 0, 3)",inf,inf,200.0,,0.000005,0.000005
164,FOODS_1_163_CA_1_validation,"(0, 1, 0)",,,,,0.000000,0.000000
...,...,...,...,...,...,...,...,...
1019,FOODS_3_433_CA_3_validation,"(3, 0, 3)",inf,inf,200.0,,1.315690,1.319772
1020,FOODS_3_433_CA_3_validation,"(3, 1, 0)",inf,inf,200.0,,2.139663,2.140423
1021,FOODS_3_433_CA_3_validation,"(3, 1, 1)",inf,inf,200.0,,1.685651,1.685862
1022,FOODS_3_433_CA_3_validation,"(3, 1, 2)",inf,inf,200.0,,1.676526,1.677171


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['SMAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Combination,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
164,FOODS_1_163_CA_1_validation,"(0, 1, 0)",,,,,0.0,0.0
165,FOODS_1_163_CA_1_validation,"(0, 1, 1)",,,,,0.0,0.0
166,FOODS_1_163_CA_1_validation,"(0, 1, 2)",,,,,0.0,0.0
167,FOODS_1_163_CA_1_validation,"(0, 1, 3)",,,,,0.0,0.0
172,FOODS_1_163_CA_1_validation,"(1, 1, 0)",,,,,0.0,0.0
173,FOODS_1_163_CA_1_validation,"(1, 1, 1)",,,,,0.0,0.0
174,FOODS_1_163_CA_1_validation,"(1, 1, 2)",,,,,0.0,0.0
175,FOODS_1_163_CA_1_validation,"(1, 1, 3)",,,,,0.0,0.0
180,FOODS_1_163_CA_1_validation,"(2, 1, 0)",,,,,0.0,0.0
181,FOODS_1_163_CA_1_validation,"(2, 1, 1)",,,,,0.0,0.0


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['MAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Combination,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
160,FOODS_1_163_CA_1_validation,"(0, 0, 0)",inf,inf,200.0,,0.000005,0.000005
161,FOODS_1_163_CA_1_validation,"(0, 0, 1)",inf,inf,200.0,,0.000005,0.000005
162,FOODS_1_163_CA_1_validation,"(0, 0, 2)",inf,inf,200.0,,0.000005,0.000005
163,FOODS_1_163_CA_1_validation,"(0, 0, 3)",inf,inf,200.0,,0.000005,0.000005
164,FOODS_1_163_CA_1_validation,"(0, 1, 0)",,,,,0.000000,0.000000
...,...,...,...,...,...,...,...,...
1019,FOODS_3_433_CA_3_validation,"(3, 0, 3)",inf,inf,200.0,,1.315690,1.319772
1020,FOODS_3_433_CA_3_validation,"(3, 1, 0)",inf,inf,200.0,,2.139663,2.140423
1021,FOODS_3_433_CA_3_validation,"(3, 1, 1)",inf,inf,200.0,,1.685651,1.685862
1022,FOODS_3_433_CA_3_validation,"(3, 1, 2)",inf,inf,200.0,,1.676526,1.677171


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['RMSE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Combination,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['MAE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Combination,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE


# Metrics Statistics

In [None]:
list_metrics = ['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE']

In [None]:
for mts in list_metrics:
    print('Percentage of unexpected values of', mts, 'is: {}'.format(df_result_metrics_all_params[df_result_metrics_all_params[mts].isin([np.nan, np.inf, -np.inf])].Product.nunique() / df_result_metrics_all_params.Product.nunique() * 100), "%")

Percentage of unexpected values of MASE is: 6.0 %
Percentage of unexpected values of WMAPE is: 6.0 %
Percentage of unexpected values of SMAPE is: 16.0 %
Percentage of unexpected values of MAPE is: 6.0 %
Percentage of unexpected values of MAE is: 0.0 %
Percentage of unexpected values of RMSE is: 0.0 %


### Filter all rows that have unexpected metrics values

In [None]:
df_result_metrics_all_params = df_result_metrics_all_params[~df_result_metrics_all_params.isin([np.nan, np.inf, -np.inf]).any(1)]

### Get MEAN metrics value of each Combination

In [None]:
df_result_metrics_all_params.groupby(['Combination'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].mean()

Unnamed: 0_level_0,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
Combination,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"(0, 0, 0)",1.247346,110.412985,81.595085,111.95176,4.34234,5.155304
"(0, 0, 1)",1.244722,109.517372,81.616909,112.648094,4.4197,5.253979
"(0, 0, 2)",1.235548,108.381232,81.664145,113.305151,4.435513,5.258394
"(0, 0, 3)",1.216645,106.642783,81.068389,112.93397,4.354884,5.174192
"(0, 1, 0)",1.205108,83.599398,89.820176,101.408283,5.85061,7.017085
"(0, 1, 1)",0.996258,82.064726,80.473015,105.852955,4.45806,5.17894
"(0, 1, 2)",0.988704,81.426064,80.52667,99.26786,4.137298,4.889839
"(0, 1, 3)",1.012252,82.836749,82.948483,102.025057,4.236737,4.991982
"(1, 0, 0)",1.185,103.428298,81.212834,110.829577,4.335857,5.172404
"(1, 0, 1)",1.095956,93.52526,80.406998,105.23297,4.302372,5.121762


In [None]:
for mts in list_metrics:
    print('The optimum Combination based on Mean', mts, 'is: {}'.format(df_result_metrics_all_params.groupby(['Combination'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].mean()[mts].idxmin()))

The optimum Combination based on Mean MASE is: (0, 1, 2)
The optimum Combination based on Mean WMAPE is: (1, 1, 2)
The optimum Combination based on Mean SMAPE is: (3, 1, 1)
The optimum Combination based on Mean MAPE is: (1, 1, 0)
The optimum Combination based on Mean MAE is: (1, 1, 1)
The optimum Combination based on Mean RMSE is: (0, 1, 2)


### Get MEDIAN metrics value of each Combination

In [None]:
df_result_metrics_all_params.groupby(['Combination'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].median()

Unnamed: 0_level_0,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
Combination,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"(0, 0, 0)",0.910001,65.2174,75.9883,64.4687,2.767496,3.197437
"(0, 0, 1)",0.908289,65.6989,75.9874,64.8027,2.864486,3.283424
"(0, 0, 2)",0.909938,65.5237,76.1131,62.5406,2.845567,3.143225
"(0, 0, 3)",0.910081,66.7414,75.5534,59.2817,2.79517,3.103397
"(0, 1, 0)",0.875,72.3077,86.3553,57.7778,2.875,3.691206
"(0, 1, 1)",0.7875,63.5151,73.4912,56.7585,2.375,2.888769
"(0, 1, 2)",0.76816,63.0525,72.693,56.5517,2.321939,2.940888
"(0, 1, 3)",0.770683,64.0116,72.9814,56.4599,2.30499,2.870881
"(1, 0, 0)",0.903305,66.1191,75.9588,63.5329,2.724995,3.163956
"(1, 0, 1)",0.889902,63.8022,75.9302,58.3163,2.728443,3.160045


In [None]:
for mts in list_metrics:
    print('The optimum Combination based on Median of', mts, 'is: {}'.format(df_result_metrics_all_params.groupby(['Combination'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].median()[mts].idxmin()))

The optimum Combination based on Median of MASE is: (0, 1, 2)
The optimum Combination based on Median of WMAPE is: (2, 1, 3)
The optimum Combination based on Median of SMAPE is: (3, 1, 3)
The optimum Combination based on Median of MAPE is: (0, 1, 3)
The optimum Combination based on Median of MAE is: (0, 1, 3)
The optimum Combination based on Median of RMSE is: (3, 1, 1)


# Run ARIMA model after decide best paramaters

In [17]:
best_combination = (0, 1, 2)

In [18]:
list_pattern_py

['HOBBIES_1_004_CA_1_validation',
 'HOBBIES_1_008_CA_1_validation',
 'HOBBIES_1_015_CA_1_validation',
 'HOBBIES_1_016_CA_1_validation',
 'HOBBIES_1_030_CA_1_validation',
 'HOBBIES_1_048_CA_1_validation',
 'HOBBIES_1_103_CA_1_validation',
 'HOBBIES_1_178_CA_1_validation',
 'HOBBIES_1_189_CA_1_validation',
 'HOBBIES_1_244_CA_1_validation',
 'HOBBIES_1_254_CA_1_validation',
 'HOBBIES_1_256_CA_1_validation',
 'HOBBIES_1_261_CA_1_validation',
 'HOBBIES_1_268_CA_1_validation',
 'HOBBIES_1_295_CA_1_validation',
 'HOBBIES_1_341_CA_1_validation',
 'HOBBIES_1_348_CA_1_validation',
 'HOBBIES_1_371_CA_1_validation',
 'HOBBIES_1_404_CA_1_validation',
 'HOBBIES_1_134_CA_2_validation',
 'HOBBIES_1_254_CA_2_validation',
 'HOBBIES_1_404_CA_2_validation',
 'HOBBIES_1_008_CA_3_validation',
 'HOBBIES_1_014_CA_3_validation',
 'HOBBIES_1_015_CA_3_validation',
 'HOBBIES_1_016_CA_3_validation',
 'HOBBIES_1_019_CA_3_validation',
 'HOBBIES_1_043_CA_3_validation',
 'HOBBIES_1_048_CA_3_validation',
 'HOBBIES_1_07

In [19]:
start = timer()

df_result_best_params = pd.DataFrame()
for product in list_pattern_py:
    print('Currently Running Product: %s' % product)
    print('Progressing: {0} %'.format(round(list_pattern_py.index(product) / len(list_pattern_py) * 100, 2)))
    train_product = train_data[product].iloc[num_train_needed*-1:]
    test_product = test_data[product]  
    
    model = ARIMA(train_product, order = best_combination)
    model_fit = model.fit()

    forecast_temp = model_fit.forecast(len(test_product))
    df_result_temp = pd.DataFrame({'Date': test_product.index,
                                   'Actual Data': test_product.values,
                                   'Forecast': forecast_temp.values,
                                   'Product': [product for product_count in range(len(test_product))]})
    
    df_result_best_params = df_result_best_params.append(df_result_temp, ignore_index=True) 
    
end = timer()
print('This line of code took {} minutes'.format((end-start) / 60))

Currently Running Product: HOBBIES_1_004_CA_1_validation
Progressing: 0.0 %
Currently Running Product: HOBBIES_1_008_CA_1_validation
Progressing: 1.49 %
Currently Running Product: HOBBIES_1_015_CA_1_validation
Progressing: 2.99 %
Currently Running Product: HOBBIES_1_016_CA_1_validation
Progressing: 4.48 %
Currently Running Product: HOBBIES_1_030_CA_1_validation
Progressing: 5.97 %
Currently Running Product: HOBBIES_1_048_CA_1_validation
Progressing: 7.46 %
Currently Running Product: HOBBIES_1_103_CA_1_validation
Progressing: 8.96 %
Currently Running Product: HOBBIES_1_178_CA_1_validation
Progressing: 10.45 %
Currently Running Product: HOBBIES_1_189_CA_1_validation
Progressing: 11.94 %
Currently Running Product: HOBBIES_1_244_CA_1_validation
Progressing: 13.43 %
Currently Running Product: HOBBIES_1_254_CA_1_validation
Progressing: 14.93 %
Currently Running Product: HOBBIES_1_256_CA_1_validation
Progressing: 16.42 %
Currently Running Product: HOBBIES_1_261_CA_1_validation
Progressing: 17



Currently Running Product: HOBBIES_1_234_CA_4_validation
Progressing: 80.6 %
Currently Running Product: HOBBIES_1_244_CA_4_validation
Progressing: 82.09 %




Currently Running Product: HOBBIES_1_254_CA_4_validation
Progressing: 83.58 %
Currently Running Product: HOBBIES_1_256_CA_4_validation
Progressing: 85.07 %
Currently Running Product: HOBBIES_1_261_CA_4_validation
Progressing: 86.57 %
Currently Running Product: HOBBIES_1_268_CA_4_validation
Progressing: 88.06 %




Currently Running Product: HOBBIES_1_295_CA_4_validation
Progressing: 89.55 %
Currently Running Product: HOBBIES_1_341_CA_4_validation
Progressing: 91.04 %
Currently Running Product: HOBBIES_1_348_CA_4_validation
Progressing: 92.54 %
Currently Running Product: HOBBIES_1_370_CA_4_validation
Progressing: 94.03 %
Currently Running Product: HOBBIES_1_371_CA_4_validation
Progressing: 95.52 %
Currently Running Product: HOBBIES_1_381_CA_4_validation
Progressing: 97.01 %
Currently Running Product: HOBBIES_1_404_CA_4_validation
Progressing: 98.51 %
This line of code took 0.1864579867333333 minutes


In [20]:
df_result_best_params

Unnamed: 0,Date,Actual Data,Forecast,Product
0,2016-04-11,0.0,1.473985,HOBBIES_1_004_CA_1_validation
1,2016-04-12,3.0,1.517675,HOBBIES_1_004_CA_1_validation
2,2016-04-13,5.0,1.517675,HOBBIES_1_004_CA_1_validation
3,2016-04-14,1.0,1.517675,HOBBIES_1_004_CA_1_validation
4,2016-04-15,1.0,1.517675,HOBBIES_1_004_CA_1_validation
...,...,...,...,...
933,2016-04-20,6.0,7.202303,HOBBIES_1_404_CA_4_validation
934,2016-04-21,1.0,7.202303,HOBBIES_1_404_CA_4_validation
935,2016-04-22,3.0,7.202303,HOBBIES_1_404_CA_4_validation
936,2016-04-23,30.0,7.202303,HOBBIES_1_404_CA_4_validation


In [21]:
#df_result_best_params.to_csv('ARIMA_Erratic_Point_Forecast.csv')

In [22]:
df_result_final = df_result_best_params.groupby('Product').apply(get_metrics_result_all_params).reset_index()

In [23]:
df_result_final

Unnamed: 0,Product,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
0,HOBBIES_1_004_CA_1_validation,0.769732,53.2891,60.2421,45.0638,1.065783,1.391972
1,HOBBIES_1_008_CA_1_validation,0.586623,88.1391,103.8011,250.9167,7.806603,9.493343
2,HOBBIES_1_008_CA_3_validation,0.842380,102.2989,98.4754,178.2527,6.868638,8.415311
3,HOBBIES_1_014_CA_3_validation,0.688517,101.1110,102.9079,91.7676,2.383330,2.899543
4,HOBBIES_1_015_CA_1_validation,0.708320,57.5844,61.4503,97.5579,4.195432,5.170566
...,...,...,...,...,...,...,...
62,HOBBIES_1_381_CA_4_validation,0.734167,117.7178,129.6259,78.0385,3.783786,4.267159
63,HOBBIES_1_387_CA_3_validation,0.564868,66.8229,76.8002,77.4525,6.300448,8.927413
64,HOBBIES_1_404_CA_1_validation,0.598593,74.2084,76.6370,123.6336,4.558514,6.505152
65,HOBBIES_1_404_CA_2_validation,0.689231,82.2928,93.1683,99.6793,5.407810,8.450449


In [24]:
df_result_final.RMSE.mean()

6.7241527863102455

In [25]:
df_result_final.MAPE.mean()

104.85669999999999

In [26]:
df_result_final.to_csv('ARIMA_Erratic_Metrics.csv')