In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta

In [12]:
def read_data(source):
    df = pd.read_csv(source)
    
    df = df[:2322]
    
    df['Date'] = pd.to_datetime(df['pay_date'])
    df.index = df['Date']
    del df['Date'] # , df['pay_date'], 
    df.sort_index()
    
    return df

#read dataframe
df = read_data("daily_67_amount.csv")

In [13]:
def Croston(ts,extra_periods=1,alpha=0.4):
    d = np.array(ts) # Transform the input into a numpy array
    cols = len(d) # Historical period length
    d = np.append(d,[np.nan]*extra_periods) # Append np.nan into the demand array to cover future periods
    
    #level (a), periodicity(p) and forecast (f)
    a,p,f = np.full((3,cols+extra_periods),np.nan)
    q = 1 #periods since last demand observation
    
    # Initialization
    first_occurence = np.argmax(d[:cols]>0)
    a[0] = d[first_occurence]
    p[0] = 1 + first_occurence
    f[0] = a[0]/p[0]
    ## Create all the t+1 forecasts
    for t in range(0,cols):        
        if d[t] > 0:
            a[t+1] = alpha*d[t] + (1-alpha)*a[t] 
            p[t+1] = alpha*q + (1-alpha)*p[t]
            f[t+1] = a[t+1]/p[t+1]
            q = 1           
        else:
            a[t+1] = a[t]
            p[t+1] = p[t]
            f[t+1] = f[t]
            q += 1
       
    ## Future Forecast 
    a[cols+1:cols+extra_periods] = a[cols]
    p[cols+1:cols+extra_periods] = p[cols]
    f[cols+1:cols+extra_periods] = f[cols]
                      
    df = pd.DataFrame.from_dict({"Demand":d,"Forecast":f,"Period":p,"Level":a,"Error":d-f})
    
    return df

In [18]:
pred_length = 4
datelist = pd.date_range(datetime.strptime("2021-01-05", "%Y-%m-%d"), datetime.strptime("2021-03-22", "%Y-%m-%d")).tolist()
x = {'begin Date':[],
     'Monday':[],
     'Mon_true':[],
     'Wednesday':[],
     'Wed_true':[],
     'Friday':[],
     'Fri_true':[],
    }

for i in datelist:
    if i.dayofweek == 1:
        input_data = df.loc[:i]
        yhat = Croston(input_data["amount"], pred_length)
        yhat = yhat['Forecast']
        predict = yhat[-3:-1].sum()
        true = df.loc[i+timedelta(days=2) : i+timedelta(days=3)]
        
        x['begin Date'].append(i)
        x['Monday'].append(int(predict))
        x['Mon_true'].append(sum(true['amount']))
        
    elif i.dayofweek == 3:
        input_data = df.loc[:i]
        yhat = Croston(input_data["amount"], pred_length)
        yhat = yhat['Forecast']
        predict = yhat[-3:].sum()
        true = df.loc[i+timedelta(days=2) : i+timedelta(days=4)]
        
        x['Wednesday'].append(int(predict))
        x['Wed_true'].append(sum(true['amount']))
        
    elif i.dayofweek == 5:
        input_data = df.loc[:i]
        yhat = Croston(input_data["amount"], pred_length)
        yhat = yhat['Forecast']
        predict = yhat[-2:].sum()
        true = df.loc[i+timedelta(days=3) : i+timedelta(days=4)]
        
        x['Friday'].append(int(predict))
        x['Fri_true'].append(sum(true['amount']))

In [19]:
result = pd.DataFrame.from_dict(x)

In [20]:
import math
error = []
mse = 0
for i in range(len(result)):
    a = result.loc[i, 'Monday'] + result.loc[i, 'Wednesday'] + result.loc[i, 'Friday']
    b = result.loc[i, 'Mon_true'] + result.loc[i, 'Wed_true'] + result.loc[i, 'Fri_true']
    c = (a - b)/a
    mse = mse + pow((a - b), 2)
    error.append(c)

In [21]:
result['error'] = error
print("MSE = ", mse/len(result))
result

MSE =  109.72727272727273


Unnamed: 0,begin Date,Monday,Mon_true,Wednesday,Wed_true,Friday,Fri_true,error
0,2021-01-05,3,3,9,0,6,0,0.833333
1,2021-01-12,6,13,6,0,3,0,0.133333
2,2021-01-19,3,1,5,6,1,9,-0.777778
3,2021-01-26,2,8,8,13,6,4,-0.5625
4,2021-02-02,8,9,8,4,8,11,0.0
5,2021-02-09,9,0,14,0,9,5,0.84375
6,2021-02-16,9,6,5,5,4,12,-0.277778
7,2021-02-23,8,3,8,0,4,9,0.4
8,2021-03-02,3,1,5,3,3,5,0.181818
9,2021-03-09,3,10,6,2,5,1,0.071429
