In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import statsmodels.api as sm
import scipy.stats as sts
from datetime import datetime

np.random.seed(0)

df = pd.read_excel("BakeryData_Vilnius.xlsx")
stores = ["main street A","main street B","station A","station B"]
daysOfTheWeek = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df["weekday_text"] = df['date'].dt.strftime('%A')
df["date"] = pd.to_datetime(df["date"])
df

Unnamed: 0,date,weekday,main street A,main street B,station A,station B,weekday_text
0,2016-05-11,3,2.23,,,,Wednesday
1,2016-05-12,4,18.10,,,,Thursday
2,2016-05-13,5,15.85,,,,Friday
3,2016-05-14,6,14.22,,,,Saturday
4,2016-05-15,7,2.58,,,,Sunday
...,...,...,...,...,...,...,...
2572,2023-05-27,6,168.05,32.34,76.97,114.30,Saturday
2573,2023-05-28,7,44.62,32.85,80.21,91.25,Sunday
2574,2023-05-29,1,64.11,116.84,149.75,92.56,Monday
2575,2023-05-30,2,103.63,134.48,194.03,75.63,Tuesday


In [7]:
begin = datetime(day = 31,month = 12,year = 2016)
pre_covid = datetime(day = 1,month = 3,year = 2021)
after_covid = datetime(day = 1,month = 3,year = 2022)
df = df[((df["date"] < pre_covid) | (df["date"] > after_covid)) & (df["date"] > begin)]

def removeOutliers(df):
    mean = df.mean()
    sd = df.std()
    return df[(df > (mean - 3*sd)) & (df < (mean + 3*sd))]

#print(removeOutliers(df[df["weekday_text"] == "Monday"]["station A"]).mean())
#print(removeOutliers(df[df["weekday_text"] == "Monday"]["station A"]).std())

In [4]:
class Store():
    def __init__(self, name, p, pl, c, cs, dist):
        self.name = name
        self.p = p
        self.pl = pl
        self.c = c
        self.cs = cs
        self.overageCost = self.p - self.c
        self.underageCost = self.c + self.cs - self.pl
        self.serviceLevel = self.overageCost / ( self.overageCost + self.underageCost)
        self.dist = dist
    def fit(self, feature):
        if self.dist == sts.norm:
            return np.mean(feature), np.std(feature, ddof=1)
        if self.dist == sts.lognorm:
            return np.std(np.log(feature),ddof=1),0,np.exp(np.mean(np.log(feature)))
        else:
            raise Exception("MLE not found for given dist")
    def feature(self, day):
        feature = df[df["weekday_text"] == day][self.name]
        feature = feature[~np.isnan(feature)]
        return removeOutliers(feature)
    def bootstrap(self, day, m, alpha):
        feature = self.feature(day)
        params = self.fit(feature)
        vQ_hat = np.zeros(m)
        n = len(feature)
        for i in range(m):
            generatedData = self.dist.rvs(*params, size=n)
            btParams = self.fit(generatedData)
            vQ_hat[i] = self.dist.ppf(self.serviceLevel, *btParams)
        return np.quantile(vQ_hat, [alpha/2, 1-alpha/2])
    def nonParametricOptimalInterval(self, day, alpha):
        feature = self.feature(day)
        criticalAmount = feature.quantile(self.serviceLevel)

        # Find interval
        z = 1 / sts.norm.ppf(1-alpha/2)
        feature = np.sort(feature)
        n = len(feature)
        deviation = (z * np.sqrt(n*self.serviceLevel * (1-self.serviceLevel)))
        upperBound = int(n*self.serviceLevel + deviation)
        lowerBound = int(n*self.serviceLevel - deviation)
        if upperBound > n:
            upperBound = n
        if lowerBound < 1:
            lowerBound = 1
        minimumAmount, maximumAmount  = feature[lowerBound - 1], feature[upperBound - 1]
        return {'store':self.name,'day':day,'service-level': self.serviceLevel,'method' : "nonParametric",'optimalQuantity': criticalAmount, 'lower bound': minimumAmount, 'upper bound': maximumAmount}
    def parametricOptimalInterval(self, day, alpha, m=100):
        feature = self.feature(day)
        params = self.fit(feature)
        optimalQuantity = self.dist.ppf(self.serviceLevel, *params)
        minimumAmount, maximumAmount = self.bootstrap(day,m,alpha)
        
        return {'store':self.name,'day':day,'service-level': self.serviceLevel,'method' : "Parametric",'optimalQuantity': optimalQuantity, 'lower bound': minimumAmount, 'upper bound': maximumAmount}

# Init the stores class with the stores we will be using
mainstreetA = Store("main street A", 4.64, 0.15, 3.85, 0.11,  sts.lognorm)
stationA = Store("station A", 4.64, 0.15, 4.16, 0.08, sts.norm)
#mainstreetB = Store("main street A", 4.64, 0.15, 3.42, 0.08)
#stationB = Store("station B", 4.64, 0.15, 3.32, 0.09)
Stores = [mainstreetA, stationA]

# m = 10000 takes about 70 second to run on our hardware
m = 10000
output = []
for store in Stores:
    for day in daysOfTheWeek:
        output.append(store.nonParametricOptimalInterval(day,0.05))
        output.append(store.parametricOptimalInterval(day,0.05,m))

outputdf = pd.DataFrame(output)
outputdf["length"] = np.abs(outputdf["lower bound"]-outputdf["upper bound"])
outputdf

Unnamed: 0,store,day,service-level,method,optimalQuantity,lower bound,upper bound,length
0,main street A,Monday,0.171739,nonParametric,40.953217,40.54,41.3,0.76
1,main street A,Monday,0.171739,Parametric,40.305496,38.631651,42.048634,3.416983
2,main street A,Tuesday,0.171739,nonParametric,40.690043,39.96,40.82,0.86
3,main street A,Tuesday,0.171739,Parametric,40.802241,39.138604,42.571037,3.432433
4,main street A,Wednesday,0.171739,nonParametric,41.398304,40.28,41.86,1.58
5,main street A,Wednesday,0.171739,Parametric,40.796702,39.126059,42.606189,3.48013
6,main street A,Thursday,0.171739,nonParametric,42.58,42.01,42.9,0.89
7,main street A,Thursday,0.171739,Parametric,41.383261,39.690632,43.211728,3.521096
8,main street A,Friday,0.171739,nonParametric,93.915761,93.69,94.04,0.35
9,main street A,Friday,0.171739,Parametric,93.983035,93.310991,94.657631,1.34664
