# Import necessary packages

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error, mean_squared_error

from timeit import default_timer as timer

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import original data and preprocessed data

In [3]:
sales_train_val = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/sales_train_validation.csv')

### Note: This Step is for the extraction of California State and Foods category 

In [4]:
sales_train_val = sales_train_val[(sales_train_val['state_id'] == 'CA') & (sales_train_val['cat_id'] == 'HOBBIES')]

In [5]:
INPUT_DIR_2 = '/content/drive/MyDrive/Colab Notebooks/List of Product ID according to 4 demand patterns/California/Hobbies/2-year'
list_intermittent = pd.read_csv(f'{INPUT_DIR_2}/Intermittent_ID_2_Year_Data.csv')
list_lumpy = pd.read_csv(f'{INPUT_DIR_2}/Lumpy_ID_2_Year_Data.csv')
list_erratic = pd.read_csv(f'{INPUT_DIR_2}/Erratic_ID_2_Year_Data.csv')
list_smooth = pd.read_csv(f'{INPUT_DIR_2}/Smooth_ID_2_Year_Data.csv')

list_intermittent = list_intermittent['0'].values.tolist()
list_lumpy = list_lumpy['0'].values.tolist()
list_erratic = list_erratic['0'].values.tolist()
list_smooth = list_smooth['0'].values.tolist()

sales_intermittent = sales_train_val[sales_train_val.id.isin(list_intermittent)]
sales_lumpy = sales_train_val[sales_train_val.id.isin(list_lumpy)]
sales_erratic = sales_train_val[sales_train_val.id.isin(list_erratic)]
sales_smooth = sales_train_val[sales_train_val.id.isin(list_smooth)]

# User-defined functions to calculate Metrics and Croston_TSB algorithm

In [6]:
ROUNDING_DECIMAL = 4

def mase_calculation(ts, prediction):
    divisor = 0
    for i in range(1, ts.shape[0]):
        divisor = divisor + abs(ts.iloc[i] - ts.iloc[i-1])
    divisor = divisor/(ts.shape[0] - 1)
    diff    = abs(ts - prediction[:ts.shape[0]])/divisor
    mase    = diff.mean()
    return mase

def mape_calculation(actual, pred): 
    if not all([isinstance(actual, np.ndarray), isinstance(pred, np.ndarray)]):
        actual, pred = np.array(actual), np.array(pred)
    mask = (actual != 0)
    return round((np.fabs(actual - pred)/actual)[mask].mean()*100, ROUNDING_DECIMAL)

def wmape_calculation(actual, pred):
    if not all([isinstance(actual, np.ndarray), isinstance(pred, np.ndarray)]):
        actual, pred = np.array(actual), np.array(pred)
    return round((np.sum(np.absolute(actual-pred))/np.sum(actual))*100, ROUNDING_DECIMAL)

def smape_calculation(actual, predicted):
    if not all([isinstance(actual, np.ndarray), isinstance(predicted, np.ndarray)]):
        actual, predicted = np.array(actual), np.array(predicted)
    return round(np.mean(np.abs(predicted - actual) / ((np.abs(predicted) + np.abs(actual))/2))*100, ROUNDING_DECIMAL)

In [7]:
def Croston_TSB(ts,extra_periods=1,alpha=0.4,beta=0.4):
    d = np.array(ts) # Transform the input into a numpy array
    cols = len(d) # Historical period length
    d = np.append(d,[np.nan]*extra_periods) # Append np.nan into the demand array to cover future periods
    
    #level (a), probability(p) and forecast (f)
    a,p,f = np.full((3,cols+extra_periods),np.nan)
# Initialization
    first_occurence = np.argmax(d[:cols]>0)
    a[0] = d[first_occurence]
    p[0] = 1/(1 + first_occurence)
    f[0] = p[0]*a[0]
                 
    # Create all the t+1 forecasts
    for t in range(0,cols): 
        if d[t] > 0:
            a[t+1] = alpha*d[t] + (1-alpha)*a[t] 
            p[t+1] = beta*(1) + (1-beta)*p[t]  
        else:
            a[t+1] = a[t]
            p[t+1] = (1-beta)*p[t]       
        f[t+1] = p[t+1]*a[t+1]
        
    # Future Forecast
    a[cols+1:cols+extra_periods] = a[cols]
    p[cols+1:cols+extra_periods] = p[cols]
    f[cols+1:cols+extra_periods] = f[cols]
                      
    df = pd.DataFrame.from_dict({"Demand":d,"Forecast":f,"Period":p,"Level":a,"Error":d-f})
    return df

# Set value for parameters

In [None]:
list_params_alpha = [round(item, 1) for item in list(np.arange(0.1, 1, 0.1))]
list_params_beta = [round(item, 1) for item in list(np.arange(0.1, 1, 0.1))]

In [8]:
start_train_date = '2014-04-11'
end_train_date = '2016-04-10'
n_pred_days = 14
num_train_needed = 100
validation_training_ratio = 0.95

# Pipeline for Croston_TSB

In [None]:
def CrostonTSB_output_all_params(sales_pattern, list_params_alpha, list_params_beta, 
                                 start_train_date, end_train_date, n_pred_days, 
                                 num_train_needed, validation_training_ratio):
    
    sales_pattern_py = sales_pattern.copy()
    list_pattern_py = sales_pattern_py.id.unique().tolist()
    
    sales_pattern_py = sales_pattern_py.drop(['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], axis=1)
    df_pattern = sales_pattern_py.melt(['id'], var_name='Date').pivot(index = ['Date'], columns = 'id', values = 'value').reset_index()
    df_pattern.drop('Date', axis=1, inplace=True)
    df_pattern.index = pd.date_range('2011-01-29', periods=df_pattern.shape[0], freq="D")
    df_pattern.index.names = ['Date']
    df_pattern = df_pattern.astype('float64')
    
    train_data = df_pattern[(df_pattern.index >= start_train_date) & (df_pattern.index <= end_train_date)]
    test_data = df_pattern.iloc[n_pred_days*-1:]
    train_data_first_part = train_data.iloc[:int(len(train_data)*validation_training_ratio), :]
    train_data_second_part = train_data.iloc[int(len(train_data)*validation_training_ratio):, :]
    
    df_result_all_params = pd.DataFrame()
    for product in list_pattern_py:
        train_first = train_data_first_part[product]
        train_second = train_data_second_part[product]
        for i in list_params_alpha:
            for j in list_params_beta:
                predictions = list()
                history = [x for x in train_first[num_train_needed*-1:]]
                for t in range(len(train_second)):
                    yhat = Croston_TSB(history, extra_periods=1, alpha = i, beta = j)['Forecast'].iloc[-1]
                    predictions.append(yhat)
                    history.append(train_second[t])
                df_result_temp = pd.DataFrame({'Product': [product for count in range(len(train_second))],
                                               'Actual Data': train_second,
                                               'Forecast': predictions,
                                               'Alpha': [i for count_i in range(len(train_second))],
                                               'Beta': [j for count_j in range(len(train_second))]})
                df_result_all_params = df_result_all_params.append(df_result_temp, ignore_index=True)
            
    return df_result_all_params

In [None]:
start = timer()

pattern_df_result_all_params = CrostonTSB_output_all_params(sales_lumpy,
                                                          list_params_alpha,
                                                          list_params_beta,
                                                          start_train_date,
                                                          end_train_date, 
                                                          n_pred_days,
                                                          num_train_needed,
                                                          validation_training_ratio)

end = timer()

print('This line of code took {} minutes'.format((end-start) / 60))

This line of code took 18.05348874781667 minutes


In [None]:
pattern_df_result_all_params

Unnamed: 0,Product,Actual Data,Forecast,Alpha,Beta
0,HOBBIES_1_006_CA_1_validation,0.0,5.403492e-01,0.1,0.1
1,HOBBIES_1_006_CA_1_validation,0.0,4.863142e-01,0.1,0.1
2,HOBBIES_1_006_CA_1_validation,0.0,4.376828e-01,0.1,0.1
3,HOBBIES_1_006_CA_1_validation,3.0,3.939145e-01,0.1,0.1
4,HOBBIES_1_006_CA_1_validation,2.0,5.725252e-01,0.1,0.1
...,...,...,...,...,...
1033960,HOBBIES_2_143_CA_4_validation,0.0,3.411000e-36,0.9,0.9
1033961,HOBBIES_2_143_CA_4_validation,0.0,3.411000e-37,0.9,0.9
1033962,HOBBIES_2_143_CA_4_validation,0.0,3.411000e-38,0.9,0.9
1033963,HOBBIES_2_143_CA_4_validation,0.0,3.411000e-39,0.9,0.9


In [None]:
pattern_df_result_all_params.to_csv('CrostonTSB_Lumpy_All_Parameters_Hyperparameter_Tuning_Data.csv')

# Summay all metrics

In [9]:
def get_metrics_result_all_params(data):
    metrics_df = {}
    metrics_df['MASE'] = mase_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['WMAPE'] = wmape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['SMAPE'] = smape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['MAPE'] = mape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['MAE'] = mean_absolute_error(data['Actual Data'], data['Forecast'])
    metrics_df['RMSE'] = np.sqrt(mean_squared_error(data['Actual Data'], data['Forecast']))
    return pd.Series(metrics_df)

In [None]:
start = timer()

df_result_metrics_all_params = pattern_df_result_all_params.groupby(['Product', 'Alpha', 'Beta']).apply(get_metrics_result_all_params).reset_index()

end = timer()
print('This line of code took {} minutes'.format((end-start) / 60))

This line of code took 1.0362241173666689 minutes


In [None]:
df_result_metrics_all_params

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
0,HOBBIES_1_006_CA_1_validation,0.1,0.1,0.762753,121.9463,164.9362,62.2898,1.186504,1.765119
1,HOBBIES_1_006_CA_1_validation,0.1,0.2,0.774426,123.8125,164.6987,61.5133,1.204662,1.796921
2,HOBBIES_1_006_CA_1_validation,0.1,0.3,0.785600,125.5991,164.7807,61.9397,1.222045,1.834157
3,HOBBIES_1_006_CA_1_validation,0.1,0.4,0.789321,126.1939,164.5807,60.8207,1.227833,1.871239
4,HOBBIES_1_006_CA_1_validation,0.1,0.5,0.789079,126.1551,164.3283,59.0577,1.227455,1.907868
...,...,...,...,...,...,...,...,...,...
27940,HOBBIES_2_149_CA_3_validation,0.9,0.5,0.842893,173.2614,197.8361,91.6583,0.140482,0.313698
27941,HOBBIES_2_149_CA_3_validation,0.9,0.6,0.866734,178.1619,197.9059,91.9920,0.144456,0.326206
27942,HOBBIES_2_149_CA_3_validation,0.9,0.7,0.890674,183.0830,198.1222,92.9930,0.148446,0.340854
27943,HOBBIES_2_149_CA_3_validation,0.9,0.8,0.916077,188.3047,198.5076,94.6613,0.152679,0.358176


# Check what products has unexpected metrics result

In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['MASE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
2187,FOODS_1_017_CA_1_validation,0.1,0.1,inf,inf,200.0,,1.913212e-02,2.724544e-02
2188,FOODS_1_017_CA_1_validation,0.1,0.2,inf,inf,200.0,,1.233952e-03,2.502595e-03
2189,FOODS_1_017_CA_1_validation,0.1,0.3,inf,inf,200.0,,7.122093e-05,1.819891e-04
2190,FOODS_1_017_CA_1_validation,0.1,0.4,inf,inf,200.0,,2.866802e-06,8.719039e-06
2191,FOODS_1_017_CA_1_validation,0.1,0.5,inf,inf,200.0,,6.648501e-08,2.334877e-07
...,...,...,...,...,...,...,...,...,...
103351,FOODS_3_827_CA_4_validation,0.9,0.5,,,,,0.000000e+00,0.000000e+00
103352,FOODS_3_827_CA_4_validation,0.9,0.6,,,,,0.000000e+00,0.000000e+00
103353,FOODS_3_827_CA_4_validation,0.9,0.7,,,,,0.000000e+00,0.000000e+00
103354,FOODS_3_827_CA_4_validation,0.9,0.8,,,,,0.000000e+00,0.000000e+00


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['WMAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
2187,FOODS_1_017_CA_1_validation,0.1,0.1,inf,inf,200.0,,1.913212e-02,2.724544e-02
2188,FOODS_1_017_CA_1_validation,0.1,0.2,inf,inf,200.0,,1.233952e-03,2.502595e-03
2189,FOODS_1_017_CA_1_validation,0.1,0.3,inf,inf,200.0,,7.122093e-05,1.819891e-04
2190,FOODS_1_017_CA_1_validation,0.1,0.4,inf,inf,200.0,,2.866802e-06,8.719039e-06
2191,FOODS_1_017_CA_1_validation,0.1,0.5,inf,inf,200.0,,6.648501e-08,2.334877e-07
...,...,...,...,...,...,...,...,...,...
103351,FOODS_3_827_CA_4_validation,0.9,0.5,,,,,0.000000e+00,0.000000e+00
103352,FOODS_3_827_CA_4_validation,0.9,0.6,,,,,0.000000e+00,0.000000e+00
103353,FOODS_3_827_CA_4_validation,0.9,0.7,,,,,0.000000e+00,0.000000e+00
103354,FOODS_3_827_CA_4_validation,0.9,0.8,,,,,0.000000e+00,0.000000e+00


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['SMAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
4536,FOODS_1_039_CA_2_validation,0.1,0.1,,,,,0.0,0.0
4537,FOODS_1_039_CA_2_validation,0.1,0.2,,,,,0.0,0.0
4538,FOODS_1_039_CA_2_validation,0.1,0.3,,,,,0.0,0.0
4539,FOODS_1_039_CA_2_validation,0.1,0.4,,,,,0.0,0.0
4540,FOODS_1_039_CA_2_validation,0.1,0.5,,,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...
103351,FOODS_3_827_CA_4_validation,0.9,0.5,,,,,0.0,0.0
103352,FOODS_3_827_CA_4_validation,0.9,0.6,,,,,0.0,0.0
103353,FOODS_3_827_CA_4_validation,0.9,0.7,,,,,0.0,0.0
103354,FOODS_3_827_CA_4_validation,0.9,0.8,,,,,0.0,0.0


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['MAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
2187,FOODS_1_017_CA_1_validation,0.1,0.1,inf,inf,200.0,,1.913212e-02,2.724544e-02
2188,FOODS_1_017_CA_1_validation,0.1,0.2,inf,inf,200.0,,1.233952e-03,2.502595e-03
2189,FOODS_1_017_CA_1_validation,0.1,0.3,inf,inf,200.0,,7.122093e-05,1.819891e-04
2190,FOODS_1_017_CA_1_validation,0.1,0.4,inf,inf,200.0,,2.866802e-06,8.719039e-06
2191,FOODS_1_017_CA_1_validation,0.1,0.5,inf,inf,200.0,,6.648501e-08,2.334877e-07
...,...,...,...,...,...,...,...,...,...
103351,FOODS_3_827_CA_4_validation,0.9,0.5,,,,,0.000000e+00,0.000000e+00
103352,FOODS_3_827_CA_4_validation,0.9,0.6,,,,,0.000000e+00,0.000000e+00
103353,FOODS_3_827_CA_4_validation,0.9,0.7,,,,,0.000000e+00,0.000000e+00
103354,FOODS_3_827_CA_4_validation,0.9,0.8,,,,,0.000000e+00,0.000000e+00


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['RMSE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE


In [None]:
df_result_metrics_all_params[df_result_metrics_all_params['MAE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE


# Metrics Statistics

In [None]:
list_metrics = ['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE']

In [None]:
for mts in list_metrics:
    print('Percentage of unexpected values of', mts, 'is: {}'.format(df_result_metrics_all_params[df_result_metrics_all_params[mts].isin([np.nan, np.inf, -np.inf])].Product.nunique() / df_result_metrics_all_params.Product.nunique() * 100), "%")

Percentage of unexpected values of MASE is: 16.231884057971012 %
Percentage of unexpected values of WMAPE is: 16.231884057971012 %
Percentage of unexpected values of SMAPE is: 14.782608695652174 %
Percentage of unexpected values of MAPE is: 16.231884057971012 %
Percentage of unexpected values of MAE is: 0.0 %
Percentage of unexpected values of RMSE is: 0.0 %


### Filter all rows that have unexpected metrics values

In [None]:
df_result_metrics_all_params = df_result_metrics_all_params[~df_result_metrics_all_params.isin([np.nan, np.inf, -np.inf]).any(1)]

### Get MEAN metrics value of each Alpha

In [None]:
df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
Alpha,Beta,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1,0.1,0.893883,136.954857,151.977668,75.936374,1.811616,2.809615
0.1,0.2,0.894088,136.711057,152.726576,79.355874,1.842813,2.830506
0.1,0.3,0.898909,137.421375,153.725579,81.982032,1.863421,2.858376
0.1,0.4,0.904850,138.396528,154.931642,84.520380,1.881931,2.889585
0.1,0.5,0.911960,139.559889,156.369213,87.243563,1.902089,2.924197
...,...,...,...,...,...,...,...
0.9,0.5,0.948327,143.873352,160.730647,96.613385,2.167571,3.434001
0.9,0.6,0.953495,144.718669,162.049320,99.063624,2.180371,3.483311
0.9,0.7,0.959730,145.728116,163.538487,101.657707,2.196343,3.538536
0.9,0.8,0.967458,146.954889,165.312516,104.561825,2.216953,3.601243


In [None]:
for mts in list_metrics:
    print('The optimum Alpha and Beta based on Mean', mts, 'is: {}'.format(df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].mean()[mts].idxmin()))

The optimum Alpha and Beta based on Mean MASE is: (0.1, 0.1)
The optimum Alpha and Beta based on Mean WMAPE is: (0.2, 0.2)
The optimum Alpha and Beta based on Mean SMAPE is: (0.1, 0.1)
The optimum Alpha and Beta based on Mean MAPE is: (0.1, 0.1)
The optimum Alpha and Beta based on Mean MAE is: (0.1, 0.1)
The optimum Alpha and Beta based on Mean RMSE is: (0.1, 0.1)


### Get MEDIAN metrics value of each Alpha

In [None]:
df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].median()

Unnamed: 0_level_0,Unnamed: 1_level_0,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
Alpha,Beta,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1,0.1,0.816369,126.0255,155.6313,73.2490,1.103216,1.695754
0.1,0.2,0.835208,127.9487,156.2267,75.4930,1.113562,1.696705
0.1,0.3,0.846167,128.9402,157.4656,76.8477,1.129291,1.737629
0.1,0.4,0.859488,130.7429,159.2111,77.8138,1.150361,1.781210
0.1,0.5,0.865244,131.3801,161.5241,80.3091,1.179973,1.805350
...,...,...,...,...,...,...,...
0.9,0.5,0.931683,143.5877,166.3263,86.2130,1.249256,1.914583
0.9,0.6,0.937800,144.3650,168.1498,88.9885,1.255175,1.969154
0.9,0.7,0.946335,146.0083,170.2517,91.5975,1.276470,2.039939
0.9,0.8,0.955395,146.6933,172.3540,93.3184,1.308346,2.103322


In [None]:
for mts in list_metrics:
    print('The optimum Alpha and Beta based on Median of', mts, 'is: {}'.format(df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].median()[mts].idxmin()))

The optimum Alpha and Beta based on Median of MASE is: (0.1, 0.1)
The optimum Alpha and Beta based on Median of WMAPE is: (0.1, 0.1)
The optimum Alpha and Beta based on Median of SMAPE is: (0.1, 0.1)
The optimum Alpha and Beta based on Median of MAPE is: (0.1, 0.1)
The optimum Alpha and Beta based on Median of MAE is: (0.2, 0.1)
The optimum Alpha and Beta based on Median of RMSE is: (0.1, 0.1)


# Run Croston model after decide best paramaters

In [10]:
best_alpha = 0.1
best_beta = 0.1

In [11]:
def CrostonTSB_test_data(best_alpha, best_beta, sales_pattern,
                         start_train_date, end_train_date, 
                         n_pred_days, num_train_needed, validation_training_ratio):
    
    sales_pattern_py = sales_pattern.copy()
    list_pattern_py = sales_pattern_py.id.unique().tolist()
    
    sales_pattern_py = sales_pattern_py.drop(['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], axis=1)
    df_pattern = sales_pattern_py.melt(['id'], var_name='Date').pivot(index = ['Date'], columns = 'id', values = 'value').reset_index()
    df_pattern.drop('Date', axis=1, inplace=True)
    df_pattern.index = pd.date_range('2011-01-29', periods=df_pattern.shape[0], freq="D")
    df_pattern.index.names = ['Date']
    df_pattern = df_pattern.astype('float64')
    
    train_data = df_pattern[(df_pattern.index >= start_train_date) & (df_pattern.index <= end_train_date)]
    test_data = df_pattern.iloc[n_pred_days*-1:]
    
    df_result_best_params = pd.DataFrame()
    for product in list_pattern_py:
        train_product = train_data[product]
        test_product = test_data[product]
        predictions = list()
        history = [x for x in train_product[num_train_needed*-1:]]
        for t in range(len(test_product)):
            yhat = Croston_TSB(history, extra_periods=1, alpha=best_alpha, beta = best_beta)['Forecast'].iloc[-1]
            predictions.append(yhat)
            history.append(test_product[t])
        df_result_temp = pd.DataFrame({'Product': [product for count in range(len(test_product))],
                                        'Actual Data': test_product,
                                        'Forecast': predictions})
        df_result_best_params = df_result_best_params.append(df_result_temp, ignore_index=True)
            
    return df_result_best_params

In [12]:
start = timer()

pattern_df_result_best_params = CrostonTSB_test_data(best_alpha, best_beta, sales_lumpy, start_train_date, end_train_date, n_pred_days, num_train_needed, validation_training_ratio)

end = timer()

print('This line of code took {} minutes'.format((end-start) / 60))

This line of code took 0.0824766407333333 minutes


In [13]:
df_result_final = pattern_df_result_best_params.groupby('Product').apply(get_metrics_result_all_params).reset_index()

In [14]:
df_result_final

Unnamed: 0,Product,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
0,HOBBIES_1_006_CA_1_validation,1.165852,163.2193,167.4653,59.6115,1.165852,1.225511
1,HOBBIES_1_006_CA_2_validation,1.282652,207.1976,176.8679,61.2683,0.591993,0.663670
2,HOBBIES_1_008_CA_2_validation,0.626158,57.5005,61.2766,53.4810,5.298256,8.668080
3,HOBBIES_1_008_CA_4_validation,0.553864,65.5578,69.1627,107.7118,5.197796,8.476727
4,HOBBIES_1_009_CA_1_validation,0.531394,105.7834,142.2381,59.7406,2.493466,4.632508
...,...,...,...,...,...,...,...
340,HOBBIES_2_148_CA_2_validation,1.774668,191.1181,199.1100,96.7850,0.136513,0.269324
341,HOBBIES_2_148_CA_3_validation,0.964382,207.7131,198.9662,96.2459,0.148367,0.272202
342,HOBBIES_2_149_CA_1_validation,0.616134,132.7057,198.5663,97.3848,0.284369,0.595904
343,HOBBIES_2_149_CA_2_validation,inf,inf,200.0000,,0.105064,0.113837


In [15]:
df_result_final.to_csv('CrostonTSB_Lumpy_Test_Data.csv')