# Import necessary packages

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error, mean_squared_error

from timeit import default_timer as timer

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import original data and preprocessed data

In [4]:
sales_train_val = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/sales_train_validation.csv')

In [5]:
sales_train_val = sales_train_val[(sales_train_val['state_id'] == 'CA') & (sales_train_val['cat_id'] == 'HOUSEHOLD')]

In [6]:
INPUT_DIR_2 = '/content/drive/MyDrive/Colab Notebooks/List of Product ID according to 4 demand patterns/California/Household/2-year'
list_intermittent = pd.read_csv(f'{INPUT_DIR_2}/Intermittent_ID_2_Year_Data.csv')
list_lumpy = pd.read_csv(f'{INPUT_DIR_2}/Lumpy_ID_2_Year_Data.csv')
list_erratic = pd.read_csv(f'{INPUT_DIR_2}/Erratic_ID_2_Year_Data.csv')
list_smooth = pd.read_csv(f'{INPUT_DIR_2}/Smooth_ID_2_Year_Data.csv')

list_intermittent = list_intermittent['0'].values.tolist()
list_lumpy = list_lumpy['0'].values.tolist()
list_erratic = list_erratic['0'].values.tolist()
list_smooth = list_smooth['0'].values.tolist()

sales_intermittent = sales_train_val[sales_train_val.id.isin(list_intermittent)]
sales_lumpy = sales_train_val[sales_train_val.id.isin(list_lumpy)]
sales_erratic = sales_train_val[sales_train_val.id.isin(list_erratic)]
sales_smooth = sales_train_val[sales_train_val.id.isin(list_smooth)]

In [11]:
sales_intermittent

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
565,HOUSEHOLD_1_001_CA_1_validation,HOUSEHOLD_1_001,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,3,0,0,0,...,1,0,1,1,1,0,0,0,0,1
566,HOUSEHOLD_1_002_CA_1_validation,HOUSEHOLD_1_002,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,1,1,2,4,...,0,0,0,0,0,1,0,0,0,1
567,HOUSEHOLD_1_003_CA_1_validation,HOUSEHOLD_1_003,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,1,2,1,0,0,0,1,0
569,HOUSEHOLD_1_005_CA_1_validation,HOUSEHOLD_1_005,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,3,2,2,...,1,0,3,1,2,0,0,1,0,1
570,HOUSEHOLD_1_006_CA_1_validation,HOUSEHOLD_1_006,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,0,0,0,...,3,4,2,6,3,2,2,1,3,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10754,HOUSEHOLD_2_512_CA_4_validation,HOUSEHOLD_2_512,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,2,1,0,0,0,0
10755,HOUSEHOLD_2_513_CA_4_validation,HOUSEHOLD_2_513,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0
10756,HOUSEHOLD_2_514_CA_4_validation,HOUSEHOLD_2_514,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,2,0,1,...,0,0,1,0,0,0,0,0,0,0
10757,HOUSEHOLD_2_515_CA_4_validation,HOUSEHOLD_2_515,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,2,0


# User-defined functions to calculate Metrics and Croston_TSB algorithm

In [7]:
ROUNDING_DECIMAL = 4

def mase_calculation(ts, prediction):
    divisor = 0
    for i in range(1, ts.shape[0]):
        divisor = divisor + abs(ts.iloc[i] - ts.iloc[i-1])
    divisor = divisor/(ts.shape[0] - 1)
    diff    = abs(ts - prediction[:ts.shape[0]])/divisor
    mase    = diff.mean()
    return mase

def mape_calculation(actual, pred): 
    if not all([isinstance(actual, np.ndarray), isinstance(pred, np.ndarray)]):
        actual, pred = np.array(actual), np.array(pred)
    mask = (actual != 0)
    return round((np.fabs(actual - pred)/actual)[mask].mean()*100, ROUNDING_DECIMAL)

def wmape_calculation(actual, pred):
    if not all([isinstance(actual, np.ndarray), isinstance(pred, np.ndarray)]):
        actual, pred = np.array(actual), np.array(pred)
    return round((np.sum(np.absolute(actual-pred))/np.sum(actual))*100, ROUNDING_DECIMAL)

def smape_calculation(actual, predicted):
    if not all([isinstance(actual, np.ndarray), isinstance(predicted, np.ndarray)]):
        actual, predicted = np.array(actual), np.array(predicted)
    return round(np.mean(np.abs(predicted - actual) / ((np.abs(predicted) + np.abs(actual))/2))*100, ROUNDING_DECIMAL)

In [8]:
def Croston_TSB(ts,extra_periods=1,alpha=0.4,beta=0.4):
    d = np.array(ts) # Transform the input into a numpy array
    cols = len(d) # Historical period length
    d = np.append(d,[np.nan]*extra_periods) # Append np.nan into the demand array to cover future periods
    
    #level (a), probability(p) and forecast (f)
    a,p,f = np.full((3,cols+extra_periods),np.nan)
# Initialization
    first_occurence = np.argmax(d[:cols]>0)
    a[0] = d[first_occurence]
    p[0] = 1/(1 + first_occurence)
    f[0] = p[0]*a[0]
                 
    # Create all the t+1 forecasts
    for t in range(0,cols): 
        if d[t] > 0:
            a[t+1] = alpha*d[t] + (1-alpha)*a[t] 
            p[t+1] = beta*(1) + (1-beta)*p[t]  
        else:
            a[t+1] = a[t]
            p[t+1] = (1-beta)*p[t]       
        f[t+1] = p[t+1]*a[t+1]
        
    # Future Forecast
    a[cols+1:cols+extra_periods] = a[cols]
    p[cols+1:cols+extra_periods] = p[cols]
    f[cols+1:cols+extra_periods] = f[cols]
                      
    df = pd.DataFrame.from_dict({"Demand":d,"Forecast":f,"Period":p,"Level":a,"Error":d-f})
    return df

# Set value for parameters

In [9]:
list_params_alpha = [round(item, 1) for item in list(np.arange(0.1, 1, 0.1))]
list_params_beta = [round(item, 1) for item in list(np.arange(0.1, 1, 0.1))]

In [10]:
start_train_date = '2014-04-11'
end_train_date = '2016-04-10'
n_pred_days = 14
num_train_needed = 100
validation_training_ratio = 0.95

# Pipeline for Croston_TSB

In [12]:
def CrostonTSB_output_all_params(sales_pattern, list_params_alpha, list_params_beta, 
                                 start_train_date, end_train_date, n_pred_days, 
                                 num_train_needed, validation_training_ratio):
    
    sales_pattern_py = sales_pattern.copy()
    list_pattern_py = sales_pattern_py.id.unique().tolist()
    
    sales_pattern_py = sales_pattern_py.drop(['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], axis=1)
    df_pattern = sales_pattern_py.melt(['id'], var_name='Date').pivot(index = ['Date'], columns = 'id', values = 'value').reset_index()
    df_pattern.drop('Date', axis=1, inplace=True)
    df_pattern.index = pd.date_range('2011-01-29', periods=df_pattern.shape[0], freq="D")
    df_pattern.index.names = ['Date']
    df_pattern = df_pattern.astype('float64')
    
    train_data = df_pattern[(df_pattern.index >= start_train_date) & (df_pattern.index <= end_train_date)]
    test_data = df_pattern.iloc[n_pred_days*-1:]
    train_data_first_part = train_data.iloc[:int(len(train_data)*validation_training_ratio), :]
    train_data_second_part = train_data.iloc[int(len(train_data)*validation_training_ratio):, :]
    
    df_result_all_params = pd.DataFrame()
    for product in list_pattern_py:
        train_first = train_data_first_part[product]
        train_second = train_data_second_part[product]
        for i in list_params_alpha:
            for j in list_params_beta:
                predictions = list()
                history = [x for x in train_first[num_train_needed*-1:]]
                for t in range(len(train_second)):
                    yhat = Croston_TSB(history, extra_periods=1, alpha = i, beta = j)['Forecast'].iloc[-1]
                    predictions.append(yhat)
                    history.append(train_second[t])
                df_result_temp = pd.DataFrame({'Product': [product for count in range(len(train_second))],
                                               'Actual Data': train_second,
                                               'Forecast': predictions,
                                               'Alpha': [i for count_i in range(len(train_second))],
                                               'Beta': [j for count_j in range(len(train_second))]})
                df_result_all_params = df_result_all_params.append(df_result_temp, ignore_index=True)
            
    return df_result_all_params

In [13]:
sales_intermittent

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
565,HOUSEHOLD_1_001_CA_1_validation,HOUSEHOLD_1_001,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,3,0,0,0,...,1,0,1,1,1,0,0,0,0,1
566,HOUSEHOLD_1_002_CA_1_validation,HOUSEHOLD_1_002,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,1,1,2,4,...,0,0,0,0,0,1,0,0,0,1
567,HOUSEHOLD_1_003_CA_1_validation,HOUSEHOLD_1_003,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,1,2,1,0,0,0,1,0
569,HOUSEHOLD_1_005_CA_1_validation,HOUSEHOLD_1_005,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,3,2,2,...,1,0,3,1,2,0,0,1,0,1
570,HOUSEHOLD_1_006_CA_1_validation,HOUSEHOLD_1_006,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,0,0,0,...,3,4,2,6,3,2,2,1,3,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10754,HOUSEHOLD_2_512_CA_4_validation,HOUSEHOLD_2_512,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,2,1,0,0,0,0
10755,HOUSEHOLD_2_513_CA_4_validation,HOUSEHOLD_2_513,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0
10756,HOUSEHOLD_2_514_CA_4_validation,HOUSEHOLD_2_514,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,2,0,1,...,0,0,1,0,0,0,0,0,0,0
10757,HOUSEHOLD_2_515_CA_4_validation,HOUSEHOLD_2_515,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,2,0


In [14]:
sales_intermittent.iloc[:500]

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
565,HOUSEHOLD_1_001_CA_1_validation,HOUSEHOLD_1_001,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,3,0,0,0,...,1,0,1,1,1,0,0,0,0,1
566,HOUSEHOLD_1_002_CA_1_validation,HOUSEHOLD_1_002,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,1,1,2,4,...,0,0,0,0,0,1,0,0,0,1
567,HOUSEHOLD_1_003_CA_1_validation,HOUSEHOLD_1_003,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,1,2,1,0,0,0,1,0
569,HOUSEHOLD_1_005_CA_1_validation,HOUSEHOLD_1_005,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,3,2,2,...,1,0,3,1,2,0,0,1,0,1
570,HOUSEHOLD_1_006_CA_1_validation,HOUSEHOLD_1_006,HOUSEHOLD_1,HOUSEHOLD,CA_1,CA,0,0,0,0,...,3,4,2,6,3,2,2,1,3,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1203,HOUSEHOLD_2_107_CA_1_validation,HOUSEHOLD_2_107,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,0,0,1,1,1,0,0,0
1204,HOUSEHOLD_2_108_CA_1_validation,HOUSEHOLD_2_108,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,2,3,2,2,...,0,0,0,0,0,0,0,0,0,0
1205,HOUSEHOLD_2_109_CA_1_validation,HOUSEHOLD_2_109,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,2,1,0,1,0,3,0,3
1206,HOUSEHOLD_2_110_CA_1_validation,HOUSEHOLD_2_110,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,0,0,...,2,2,6,3,2,0,2,1,7,4


In [15]:
sales_intermittent.iloc[500:1000]

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
1209,HOUSEHOLD_2_113_CA_1_validation,HOUSEHOLD_2_113,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,1,0,...,0,0,0,1,1,0,0,0,1,2
1210,HOUSEHOLD_2_114_CA_1_validation,HOUSEHOLD_2_114,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,0,0,0,1,1,1,1,1
1212,HOUSEHOLD_2_116_CA_1_validation,HOUSEHOLD_2_116,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1213,HOUSEHOLD_2_117_CA_1_validation,HOUSEHOLD_2_117,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1214,HOUSEHOLD_2_118_CA_1_validation,HOUSEHOLD_2_118,HOUSEHOLD_2,HOUSEHOLD,CA_1,CA,0,0,0,0,...,0,0,0,0,1,1,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3753,HOUSEHOLD_1_144_CA_2_validation,HOUSEHOLD_1_144,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,2,0,1,0,0,0,0,0,0
3754,HOUSEHOLD_1_145_CA_2_validation,HOUSEHOLD_1_145,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,6,5,3,2,...,1,1,3,6,4,2,3,3,7,1
3755,HOUSEHOLD_1_146_CA_2_validation,HOUSEHOLD_1_146,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,2,2,2,1,...,2,1,1,1,1,0,1,0,4,2
3757,HOUSEHOLD_1_148_CA_2_validation,HOUSEHOLD_1_148,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
sales_intermittent.iloc[1000:1500]

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
3761,HOUSEHOLD_1_152_CA_2_validation,HOUSEHOLD_1_152,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,0,6,0,0,1,1,2,1,2
3763,HOUSEHOLD_1_154_CA_2_validation,HOUSEHOLD_1_154,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,3,4,2,3,...,0,0,0,0,0,0,0,0,1,1
3764,HOUSEHOLD_1_155_CA_2_validation,HOUSEHOLD_1_155,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3766,HOUSEHOLD_1_157_CA_2_validation,HOUSEHOLD_1_157,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,0,0,0,0,...,1,3,4,1,5,6,2,0,3,2
3767,HOUSEHOLD_1_158_CA_2_validation,HOUSEHOLD_1_158,HOUSEHOLD_1,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,1,4,0,1,1,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4381,HOUSEHOLD_2_237_CA_2_validation,HOUSEHOLD_2_237,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
4382,HOUSEHOLD_2_238_CA_2_validation,HOUSEHOLD_2_238,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,2,0,0,...,1,3,2,0,1,0,0,1,0,1
4383,HOUSEHOLD_2_239_CA_2_validation,HOUSEHOLD_2_239,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,4,0,0,...,0,6,0,0,0,0,0,0,0,0
4385,HOUSEHOLD_2_241_CA_2_validation,HOUSEHOLD_2_241,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
sales_intermittent.iloc[1500:2000]

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
4387,HOUSEHOLD_2_243_CA_2_validation,HOUSEHOLD_2_243,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,0,2,0,...,0,0,0,0,0,0,0,0,1,0
4388,HOUSEHOLD_2_244_CA_2_validation,HOUSEHOLD_2_244,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,1,2,0,0,0,1,0,0,1
4389,HOUSEHOLD_2_245_CA_2_validation,HOUSEHOLD_2_245,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4390,HOUSEHOLD_2_246_CA_2_validation,HOUSEHOLD_2_246,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,0,2,0,...,0,0,0,0,1,0,0,1,0,0
4391,HOUSEHOLD_2_247_CA_2_validation,HOUSEHOLD_2_247,HOUSEHOLD_2,HOUSEHOLD,CA_2,CA,0,0,1,0,...,1,2,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7095,HOUSEHOLD_1_442_CA_3_validation,HOUSEHOLD_1_442,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,0,0,0,0,...,2,0,3,1,1,0,1,3,0,0
7097,HOUSEHOLD_1_444_CA_3_validation,HOUSEHOLD_1_444,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,0,0,0,0,...,0,1,1,1,0,2,2,1,2,0
7099,HOUSEHOLD_1_446_CA_3_validation,HOUSEHOLD_1_446,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,0,0,0,1,...,0,1,1,1,2,0,0,0,2,0
7103,HOUSEHOLD_1_450_CA_3_validation,HOUSEHOLD_1_450,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,0,0,0,0,...,2,2,4,1,3,1,2,0,0,0


In [18]:
sales_intermittent.iloc[2000:2500]

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
7105,HOUSEHOLD_1_452_CA_3_validation,HOUSEHOLD_1_452,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,5,1,3,0,...,1,1,0,0,0,0,0,3,1,1
7107,HOUSEHOLD_1_454_CA_3_validation,HOUSEHOLD_1_454,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,0,0,0,0,...,0,0,0,0,0,3,1,0,1,2
7111,HOUSEHOLD_1_458_CA_3_validation,HOUSEHOLD_1_458,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,0,0,0,0,...,4,5,1,3,1,0,4,2,2,2
7113,HOUSEHOLD_1_460_CA_3_validation,HOUSEHOLD_1_460,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,2,5,5,4,...,0,1,1,1,0,0,1,0,1,4
7116,HOUSEHOLD_1_463_CA_3_validation,HOUSEHOLD_1_463,HOUSEHOLD_1,HOUSEHOLD,CA_3,CA,0,0,0,0,...,0,0,0,1,3,1,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7701,HOUSEHOLD_2_508_CA_3_validation,HOUSEHOLD_2_508,HOUSEHOLD_2,HOUSEHOLD,CA_3,CA,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
7702,HOUSEHOLD_2_509_CA_3_validation,HOUSEHOLD_2_509,HOUSEHOLD_2,HOUSEHOLD,CA_3,CA,1,0,4,3,...,1,3,0,2,1,1,0,2,3,4
7704,HOUSEHOLD_2_511_CA_3_validation,HOUSEHOLD_2_511,HOUSEHOLD_2,HOUSEHOLD,CA_3,CA,5,9,1,4,...,0,0,1,0,0,0,0,1,2,0
7705,HOUSEHOLD_2_512_CA_3_validation,HOUSEHOLD_2_512,HOUSEHOLD_2,HOUSEHOLD,CA_3,CA,0,0,0,0,...,1,2,0,0,6,0,5,1,3,2


In [19]:
sales_intermittent.iloc[2500:3000]

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
7708,HOUSEHOLD_2_515_CA_3_validation,HOUSEHOLD_2_515,HOUSEHOLD_2,HOUSEHOLD,CA_3,CA,0,0,0,0,...,1,1,1,0,0,0,0,0,0,0
7709,HOUSEHOLD_2_516_CA_3_validation,HOUSEHOLD_2_516,HOUSEHOLD_2,HOUSEHOLD,CA_3,CA,0,1,0,1,...,1,0,0,0,0,0,0,0,0,0
9712,HOUSEHOLD_1_001_CA_4_validation,HOUSEHOLD_1_001,HOUSEHOLD_1,HOUSEHOLD,CA_4,CA,1,1,0,1,...,0,1,1,0,0,1,1,0,2,0
9713,HOUSEHOLD_1_002_CA_4_validation,HOUSEHOLD_1_002,HOUSEHOLD_1,HOUSEHOLD,CA_4,CA,0,1,0,2,...,0,2,1,1,1,1,3,1,1,0
9714,HOUSEHOLD_1_003_CA_4_validation,HOUSEHOLD_1_003,HOUSEHOLD_1,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,1,0,0,0,0,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10245,HOUSEHOLD_2_002_CA_4_validation,HOUSEHOLD_2_002,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,2,1,0,...,0,0,0,0,0,0,0,2,0,1
10246,HOUSEHOLD_2_003_CA_4_validation,HOUSEHOLD_2_003,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,1,1,0,1,0,0,0,1,0,0
10247,HOUSEHOLD_2_004_CA_4_validation,HOUSEHOLD_2_004,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
10248,HOUSEHOLD_2_005_CA_4_validation,HOUSEHOLD_2_005,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0


In [20]:
sales_intermittent.iloc[3000:]

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
10250,HOUSEHOLD_2_007_CA_4_validation,HOUSEHOLD_2_007,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10251,HOUSEHOLD_2_008_CA_4_validation,HOUSEHOLD_2_008,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10252,HOUSEHOLD_2_009_CA_4_validation,HOUSEHOLD_2_009,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,1,0,0,0,0,0,1,0,1
10253,HOUSEHOLD_2_010_CA_4_validation,HOUSEHOLD_2_010,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,1,0,1,0,...,0,0,0,0,0,0,0,0,1,0
10254,HOUSEHOLD_2_011_CA_4_validation,HOUSEHOLD_2_011,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,2,0,0,1,0,1,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10754,HOUSEHOLD_2_512_CA_4_validation,HOUSEHOLD_2_512,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,2,1,0,0,0,0
10755,HOUSEHOLD_2_513_CA_4_validation,HOUSEHOLD_2_513,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0
10756,HOUSEHOLD_2_514_CA_4_validation,HOUSEHOLD_2_514,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,2,0,1,...,0,0,1,0,0,0,0,0,0,0
10757,HOUSEHOLD_2_515_CA_4_validation,HOUSEHOLD_2_515,HOUSEHOLD_2,HOUSEHOLD,CA_4,CA,0,0,0,0,...,0,0,0,0,0,0,0,0,2,0


In [21]:
start = timer()

pattern_df_result_all_params = CrostonTSB_output_all_params(sales_intermittent.iloc[3000:],
                                                          list_params_alpha,
                                                          list_params_beta,
                                                          start_train_date,
                                                          end_train_date, 
                                                          n_pred_days,
                                                          num_train_needed,
                                                          validation_training_ratio)

end = timer()

print('This line of code took {} minutes'.format((end-start) / 60))

This line of code took 28.591983576233336 minutes


In [22]:
pattern_df_result_all_params

Unnamed: 0,Product,Actual Data,Forecast,Alpha,Beta
0,HOUSEHOLD_2_007_CA_4_validation,0.0,1.904884e-01,0.1,0.1
1,HOUSEHOLD_2_007_CA_4_validation,1.0,1.714396e-01,0.1,0.1
2,HOUSEHOLD_2_007_CA_4_validation,0.0,2.638891e-01,0.1,0.1
3,HOUSEHOLD_2_007_CA_4_validation,1.0,2.375002e-01,0.1,0.1
4,HOUSEHOLD_2_007_CA_4_validation,0.0,3.217624e-01,0.1,0.1
...,...,...,...,...,...
1468525,HOUSEHOLD_2_516_CA_4_validation,0.0,9.081000e-27,0.9,0.9
1468526,HOUSEHOLD_2_516_CA_4_validation,0.0,9.081000e-28,0.9,0.9
1468527,HOUSEHOLD_2_516_CA_4_validation,0.0,9.081000e-29,0.9,0.9
1468528,HOUSEHOLD_2_516_CA_4_validation,0.0,9.081000e-30,0.9,0.9


In [23]:
pattern_df_result_all_params.to_csv('CrostonTSB_Intermittent_3000_End_All_Parameters_Hyperparameter_Tuning_Data.csv')

# Summay all metrics

In [26]:
def get_metrics_result_all_params(data):
    metrics_df = {}
    metrics_df['MASE'] = mase_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['WMAPE'] = wmape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['SMAPE'] = smape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['MAPE'] = mape_calculation(data['Actual Data'], data['Forecast'])
    metrics_df['MAE'] = mean_absolute_error(data['Actual Data'], data['Forecast'])
    metrics_df['RMSE'] = np.sqrt(mean_squared_error(data['Actual Data'], data['Forecast']))
    return pd.Series(metrics_df)

In [27]:
start = timer()

df_result_metrics_all_params = pattern_df_result_all_params.groupby(['Product', 'Alpha', 'Beta']).apply(get_metrics_result_all_params).reset_index()

end = timer()
print('This line of code took {} minutes'.format((end-start) / 60))

This line of code took 1.6533649063166649 minutes


In [28]:
df_result_metrics_all_params

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
0,HOUSEHOLD_2_007_CA_4_validation,0.1,0.1,0.878456,180.5716,191.3379,84.2920,0.292819,0.385272
1,HOUSEHOLD_2_007_CA_4_validation,0.1,0.2,0.880690,181.0308,193.0132,87.3893,0.293563,0.401861
2,HOUSEHOLD_2_007_CA_4_validation,0.1,0.3,0.895469,184.0687,194.2329,89.5206,0.298490,0.419331
3,HOUSEHOLD_2_007_CA_4_validation,0.1,0.4,0.911082,187.2780,195.2750,91.3054,0.303694,0.437885
4,HOUSEHOLD_2_007_CA_4_validation,0.1,0.5,0.925791,190.3016,196.1807,92.8824,0.308597,0.457848
...,...,...,...,...,...,...,...,...,...
39685,HOUSEHOLD_2_516_CA_4_validation,0.9,0.5,0.979356,201.3120,199.9993,99.9933,0.054409,0.190254
39686,HOUSEHOLD_2_516_CA_4_validation,0.9,0.6,0.977694,200.9705,200.0000,99.9996,0.054316,0.197026
39687,HOUSEHOLD_2_516_CA_4_validation,0.9,0.7,0.977386,200.9071,200.0000,100.0000,0.054299,0.204556
39688,HOUSEHOLD_2_516_CA_4_validation,0.9,0.8,0.977353,200.9003,200.0000,100.0000,0.054297,0.213004


# Check what products has unexpected metrics result

In [29]:
df_result_metrics_all_params[df_result_metrics_all_params['MASE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
405,HOUSEHOLD_2_012_CA_4_validation,0.1,0.1,inf,inf,200.0,,5.571667e-03,7.934434e-03
406,HOUSEHOLD_2_012_CA_4_validation,0.1,0.2,inf,inf,200.0,,2.989552e-04,6.063152e-04
407,HOUSEHOLD_2_012_CA_4_validation,0.1,0.3,inf,inf,200.0,,1.688979e-05,4.315808e-05
408,HOUSEHOLD_2_012_CA_4_validation,0.1,0.4,inf,inf,200.0,,6.569460e-07,1.998023e-06
409,HOUSEHOLD_2_012_CA_4_validation,0.1,0.5,inf,inf,200.0,,1.426230e-08,5.008757e-08
...,...,...,...,...,...,...,...,...,...
39604,HOUSEHOLD_2_515_CA_4_validation,0.9,0.5,,,,,0.000000e+00,0.000000e+00
39605,HOUSEHOLD_2_515_CA_4_validation,0.9,0.6,,,,,0.000000e+00,0.000000e+00
39606,HOUSEHOLD_2_515_CA_4_validation,0.9,0.7,,,,,0.000000e+00,0.000000e+00
39607,HOUSEHOLD_2_515_CA_4_validation,0.9,0.8,,,,,0.000000e+00,0.000000e+00


In [30]:
df_result_metrics_all_params[df_result_metrics_all_params['WMAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
405,HOUSEHOLD_2_012_CA_4_validation,0.1,0.1,inf,inf,200.0,,5.571667e-03,7.934434e-03
406,HOUSEHOLD_2_012_CA_4_validation,0.1,0.2,inf,inf,200.0,,2.989552e-04,6.063152e-04
407,HOUSEHOLD_2_012_CA_4_validation,0.1,0.3,inf,inf,200.0,,1.688979e-05,4.315808e-05
408,HOUSEHOLD_2_012_CA_4_validation,0.1,0.4,inf,inf,200.0,,6.569460e-07,1.998023e-06
409,HOUSEHOLD_2_012_CA_4_validation,0.1,0.5,inf,inf,200.0,,1.426230e-08,5.008757e-08
...,...,...,...,...,...,...,...,...,...
39604,HOUSEHOLD_2_515_CA_4_validation,0.9,0.5,,,,,0.000000e+00,0.000000e+00
39605,HOUSEHOLD_2_515_CA_4_validation,0.9,0.6,,,,,0.000000e+00,0.000000e+00
39606,HOUSEHOLD_2_515_CA_4_validation,0.9,0.7,,,,,0.000000e+00,0.000000e+00
39607,HOUSEHOLD_2_515_CA_4_validation,0.9,0.8,,,,,0.000000e+00,0.000000e+00


In [31]:
df_result_metrics_all_params[df_result_metrics_all_params['SMAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
810,HOUSEHOLD_2_017_CA_4_validation,0.1,0.1,,,,,0.0,0.0
811,HOUSEHOLD_2_017_CA_4_validation,0.1,0.2,,,,,0.0,0.0
812,HOUSEHOLD_2_017_CA_4_validation,0.1,0.3,,,,,0.0,0.0
813,HOUSEHOLD_2_017_CA_4_validation,0.1,0.4,,,,,0.0,0.0
814,HOUSEHOLD_2_017_CA_4_validation,0.1,0.5,,,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...
39604,HOUSEHOLD_2_515_CA_4_validation,0.9,0.5,,,,,0.0,0.0
39605,HOUSEHOLD_2_515_CA_4_validation,0.9,0.6,,,,,0.0,0.0
39606,HOUSEHOLD_2_515_CA_4_validation,0.9,0.7,,,,,0.0,0.0
39607,HOUSEHOLD_2_515_CA_4_validation,0.9,0.8,,,,,0.0,0.0


In [32]:
df_result_metrics_all_params[df_result_metrics_all_params['MAPE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
405,HOUSEHOLD_2_012_CA_4_validation,0.1,0.1,inf,inf,200.0,,5.571667e-03,7.934434e-03
406,HOUSEHOLD_2_012_CA_4_validation,0.1,0.2,inf,inf,200.0,,2.989552e-04,6.063152e-04
407,HOUSEHOLD_2_012_CA_4_validation,0.1,0.3,inf,inf,200.0,,1.688979e-05,4.315808e-05
408,HOUSEHOLD_2_012_CA_4_validation,0.1,0.4,inf,inf,200.0,,6.569460e-07,1.998023e-06
409,HOUSEHOLD_2_012_CA_4_validation,0.1,0.5,inf,inf,200.0,,1.426230e-08,5.008757e-08
...,...,...,...,...,...,...,...,...,...
39604,HOUSEHOLD_2_515_CA_4_validation,0.9,0.5,,,,,0.000000e+00,0.000000e+00
39605,HOUSEHOLD_2_515_CA_4_validation,0.9,0.6,,,,,0.000000e+00,0.000000e+00
39606,HOUSEHOLD_2_515_CA_4_validation,0.9,0.7,,,,,0.000000e+00,0.000000e+00
39607,HOUSEHOLD_2_515_CA_4_validation,0.9,0.8,,,,,0.000000e+00,0.000000e+00


In [33]:
df_result_metrics_all_params[df_result_metrics_all_params['RMSE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE


In [34]:
df_result_metrics_all_params[df_result_metrics_all_params['MAE'].isin([np.inf, -np.inf, np.nan])]

Unnamed: 0,Product,Alpha,Beta,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE


# Metrics Statistics

In [35]:
list_metrics = ['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE']

In [36]:
for mts in list_metrics:
    print('Percentage of unexpected values of', mts, 'is: {}'.format(df_result_metrics_all_params[df_result_metrics_all_params[mts].isin([np.nan, np.inf, -np.inf])].Product.nunique() / df_result_metrics_all_params.Product.nunique() * 100), "%")

Percentage of unexpected values of MASE is: 27.55102040816326 %
Percentage of unexpected values of WMAPE is: 27.55102040816326 %
Percentage of unexpected values of SMAPE is: 20.0 %
Percentage of unexpected values of MAPE is: 27.55102040816326 %
Percentage of unexpected values of MAE is: 0.0 %
Percentage of unexpected values of RMSE is: 0.0 %


### Filter all rows that have unexpected metrics values

In [37]:
df_result_metrics_all_params = df_result_metrics_all_params[~df_result_metrics_all_params.isin([np.nan, np.inf, -np.inf]).any(1)]

### Get MEAN metrics value of each Alpha

In [38]:
df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
Alpha,Beta,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1,0.1,1.022506,172.434140,184.542284,80.776270,0.347211,0.489886
0.1,0.2,1.008843,170.142937,185.062615,81.292397,0.345398,0.500558
0.1,0.3,1.001627,169.095871,185.509664,81.461807,0.344563,0.512319
0.1,0.4,0.996580,168.475864,185.985674,81.621160,0.344168,0.525187
0.1,0.5,0.993236,168.165651,186.535526,81.894511,0.344390,0.539412
...,...,...,...,...,...,...,...
0.9,0.5,0.996567,168.483770,187.184400,83.139424,0.351072,0.552902
0.9,0.6,0.996474,168.682621,187.851562,83.909027,0.352659,0.570017
0.9,0.7,0.997591,169.077274,188.588381,84.810465,0.354670,0.589219
0.9,0.8,0.999570,169.612747,189.415059,85.792310,0.357090,0.610926


In [39]:
for mts in list_metrics:
    print('The optimum Alpha and Beta based on Mean', mts, 'is: {}'.format(df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].mean()[mts].idxmin()))

The optimum Alpha and Beta based on Mean MASE is: (0.2, 0.7)
The optimum Alpha and Beta based on Mean WMAPE is: (0.2, 0.6)
The optimum Alpha and Beta based on Mean SMAPE is: (0.1, 0.1)
The optimum Alpha and Beta based on Mean MAPE is: (0.1, 0.1)
The optimum Alpha and Beta based on Mean MAE is: (0.1, 0.4)
The optimum Alpha and Beta based on Mean RMSE is: (0.1, 0.1)


### Get MEDIAN metrics value of each Alpha

In [40]:
df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].median()

Unnamed: 0_level_0,Unnamed: 1_level_0,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
Alpha,Beta,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.1,0.1,0.980561,165.88465,191.99620,84.73875,0.314379,0.448267
0.1,0.2,0.972863,167.14745,192.48740,84.50540,0.306501,0.458820
0.1,0.3,0.972577,166.43595,192.77895,84.39525,0.302338,0.471646
0.1,0.4,0.972974,165.89390,193.07355,84.60595,0.304113,0.484948
0.1,0.5,0.973018,166.44380,193.46640,84.79410,0.301474,0.501492
...,...,...,...,...,...,...,...
0.9,0.5,0.972935,168.80340,193.61940,85.01900,0.300987,0.499448
0.9,0.6,0.972973,169.84050,193.96470,85.22625,0.299566,0.517222
0.9,0.7,0.972973,171.17660,194.53140,86.02840,0.300400,0.534842
0.9,0.8,0.974317,170.45475,194.78620,87.36885,0.299671,0.556785


In [41]:
for mts in list_metrics:
    print('The optimum Alpha and Beta based on Median of', mts, 'is: {}'.format(df_result_metrics_all_params.groupby(['Alpha', 'Beta'])['MASE', 'WMAPE', 'SMAPE', 'MAPE', 'MAE', 'RMSE'].median()[mts].idxmin()))

The optimum Alpha and Beta based on Median of MASE is: (0.4, 0.2)
The optimum Alpha and Beta based on Median of WMAPE is: (0.2, 0.1)
The optimum Alpha and Beta based on Median of SMAPE is: (0.6, 0.1)
The optimum Alpha and Beta based on Median of MAPE is: (0.3, 0.2)
The optimum Alpha and Beta based on Median of MAE is: (0.1, 0.8)
The optimum Alpha and Beta based on Median of RMSE is: (0.3, 0.1)


# Run CrostonTSB model after decide best paramaters

In [42]:
best_alpha = 0.1
best_beta = 0.1

In [43]:
def CrostonTSB_test_data(best_alpha, best_beta, sales_pattern,
                         start_train_date, end_train_date, 
                         n_pred_days, num_train_needed, validation_training_ratio):
    
    sales_pattern_py = sales_pattern.copy()
    list_pattern_py = sales_pattern_py.id.unique().tolist()
    
    sales_pattern_py = sales_pattern_py.drop(['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], axis=1)
    df_pattern = sales_pattern_py.melt(['id'], var_name='Date').pivot(index = ['Date'], columns = 'id', values = 'value').reset_index()
    df_pattern.drop('Date', axis=1, inplace=True)
    df_pattern.index = pd.date_range('2011-01-29', periods=df_pattern.shape[0], freq="D")
    df_pattern.index.names = ['Date']
    df_pattern = df_pattern.astype('float64')
    
    train_data = df_pattern[(df_pattern.index >= start_train_date) & (df_pattern.index <= end_train_date)]
    test_data = df_pattern.iloc[n_pred_days*-1:]
    
    df_result_best_params = pd.DataFrame()
    for product in list_pattern_py:
        train_product = train_data[product]
        test_product = test_data[product]
        predictions = list()
        history = [x for x in train_product[num_train_needed*-1:]]
        for t in range(len(test_product)):
            yhat = Croston_TSB(history, extra_periods=1, alpha=best_alpha, beta = best_beta)['Forecast'].iloc[-1]
            predictions.append(yhat)
            history.append(test_product[t])
        df_result_temp = pd.DataFrame({'Product': [product for count in range(len(test_product))],
                                        'Actual Data': test_product,
                                        'Forecast': predictions})
        df_result_best_params = df_result_best_params.append(df_result_temp, ignore_index=True)
            
    return df_result_best_params

In [44]:
start = timer()

pattern_df_result_best_params = CrostonTSB_test_data(best_alpha, best_beta, sales_intermittent, start_train_date, end_train_date, n_pred_days, num_train_needed, validation_training_ratio)

end = timer()

print('This line of code took {} minutes'.format((end-start) / 60))

This line of code took 0.8097675214166732 minutes


In [45]:
df_result_final = pattern_df_result_best_params.groupby('Product').apply(get_metrics_result_all_params).reset_index()

In [46]:
df_result_final

Unnamed: 0,Product,MASE,WMAPE,SMAPE,MAPE,MAE,RMSE
0,HOUSEHOLD_1_001_CA_1_validation,1.061912,152.4797,189.6886,85.9169,0.326742,0.436061
1,HOUSEHOLD_1_001_CA_2_validation,0.772459,166.3759,199.9586,99.8550,0.118840,0.273151
2,HOUSEHOLD_1_001_CA_3_validation,0.732277,122.6720,135.5755,35.2764,0.788606,0.964024
3,HOUSEHOLD_1_001_CA_4_validation,inf,inf,200.0000,,0.063443,0.068741
4,HOUSEHOLD_1_002_CA_1_validation,1.218606,196.8517,193.0506,86.0428,0.281217,0.370623
...,...,...,...,...,...,...,...
3485,HOUSEHOLD_2_515_CA_4_validation,,,,,0.000000,0.000000
3486,HOUSEHOLD_2_516_CA_1_validation,1.605843,172.9370,195.1632,90.5282,0.247053,0.368767
3487,HOUSEHOLD_2_516_CA_2_validation,inf,inf,200.0000,,0.151658,0.164321
3488,HOUSEHOLD_2_516_CA_3_validation,2.542754,273.8350,195.7705,82.6246,0.195596,0.268099


In [47]:
df_result_final.to_csv('CrostonTSB_Intermittent_Test_Data.csv')