In [34]:
import scipy.stats as sts
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.integrate import quad
from time import time
from numba import njit


np.random.seed(0)
def timer_func(func):
    # This function shows the execution time of 
    # the function object passed
    def wrap_func(*args, **kwargs):
        t1 = time()
        result = func(*args, **kwargs)
        t2 = time()
        print(f'Function {func.__name__!r} executed in {(t2-t1):.4f}s')
        return result
    return wrap_func

# Estimators
def nonParametricOptimalQuantity(distributionData, tau):
    optimalQuantity = np.quantile(distributionData,tau)
    return optimalQuantity

def parametricOptimalQuantity(tau, dist, parameters):
    optimalQuantity = dist.ppf(tau, *parameters)
    return optimalQuantity

def realParamatricOptimalQuantity(tau, realDist):
    optimalQuantity = realDist.ppf(tau)
    return optimalQuantity

# Optimized code for integrating the normal distribution (very quickly), @njit is compiling the python code to C

@njit
def normal_pdf(x, mean, std_dev):
    coefficient = 1 / (std_dev * np.sqrt(2 * np.pi))
    exponent = -((x - mean) ** 2) / (2 * std_dev ** 2)
    return x * coefficient * np.exp(exponent)

#def normal_pdf(x, mean, std_dev):
#    return x * sts.norm.pdf(x,loc=mean,scale=std_dev)

def integrate_normal(Q_star,parameters):
    return quad(normal_pdf,0,Q_star,args=(parameters[0], parameters[1]))[0]

#################################################################################################################

def calculateOptimalProfit(tau,Q_star,dist, paramaters):
    price = 1
    cost = 1 - tau
    integral_value = integrate_normal(Q_star, paramaters)
    t1 = price * integral_value
    t2 = price * Q_star * dist.sf(Q_star)
    t3 = cost * Q_star
    return t1 + t2 - t3

# Evaluation of test statistics functions
def empericalRootMeanSquaredError(m, optimalQuantities, realOptimalQuantity):
    rmse = np.sqrt(1/m * np.sum((optimalQuantities - realOptimalQuantity)**2))
    return rmse

def empericalRootMeanSquaredErrorRatio(m, parametricOptimalQuantitys, nonParametricOptimalQuantitys, realOptimalQuantity):
    return empericalRootMeanSquaredError(m, nonParametricOptimalQuantitys, realOptimalQuantity) / empericalRootMeanSquaredError(m, parametricOptimalQuantitys, realOptimalQuantity)

def empericalProfitLoss(m, profitEstimator, profitReal):
    return 1 / m * np.sum(np.abs((profitReal - profitEstimator) /  profitReal))

def empericalProfitLossRatio(m, parametricExpectedProfit, nonParametricExpectedProfit, realExpectedProfit):
    return empericalProfitLoss(m, nonParametricExpectedProfit, realExpectedProfit) / empericalProfitLoss(m, parametricExpectedProfit, realExpectedProfit)

def empericalServiceLevel(m, optimalQuantities, demand):
    indicatorFunction = np.where(optimalQuantities.T >= demand.T, 1, 0)
    return 1 / m * np.sum(indicatorFunction)

def fit(feature):
    return np.mean(feature), np.std(feature)

#@timer_func
def monteCarlo(m, tau, n, params):
    '''
    When changing distribution make sure to:
    1. change parmeter array to hold x parameters (the same as fit function)
    2. change dist and distreal
    3. change function that will be used for integration for the optimal profit
    '''

    # Distribution that the paramatric estimator will be using
    dist = sts.norm
    # How the data is really distributed, when changing remember to change the PDF for the precompiled function.
    distReal = sts.norm(loc=params[0], scale=params[1])

    demand = np.empty((m,1))
    realOptimalQuantity = np.empty((m,1))
    parametricOptimalQuantitys = np.empty((m,1))
    nonParametricOptimalQuantitys = np.empty((m,1))
    allParameters = np.empty((m,2))
    parametricExpectedProfits = np.empty((m,1))
    nonParametricExpectedProfits = np.empty((m,1))
    empericalServiceLevelData = np.empty((m,1))

    # Compute estimators and optimal quantities
    for j in range(m):
        distributionData = distReal.rvs(size = n)
        allParameters[j] = parameters = fit(distributionData)
        demand[j] = parameters[0] # in case of a normal distribution
        parametricOptimalQuantitys[j] = parametricOptimalQuantity(tau, dist, parameters)
        nonParametricOptimalQuantitys[j] = nonParametricOptimalQuantity(distributionData, tau)
        empericalServiceLevelData[j] = distributionData[-1]
    
    # We use Numba to compile the python code to C
        parametricExpectedProfits[j] = calculateOptimalProfit(tau, parametricOptimalQuantitys[j], distReal, params)
        nonParametricExpectedProfits[j] = calculateOptimalProfit(tau, nonParametricOptimalQuantitys[j], distReal, params)
    
    realOptimalQuantity = realParamatricOptimalQuantity(tau, distReal)
    realExpectedProfit = calculateOptimalProfit(tau, realOptimalQuantity, distReal, params)

    eplr = empericalProfitLossRatio(m, parametricExpectedProfits, nonParametricExpectedProfits, realExpectedProfit)

    # Compute evaluation statistics
    rmse = empericalRootMeanSquaredErrorRatio(m, parametricOptimalQuantitys, nonParametricOptimalQuantitys, realOptimalQuantity)
    eslParametric = empericalServiceLevel(m, parametricOptimalQuantitys, empericalServiceLevelData)
    eslNonParametric = empericalServiceLevel(m, nonParametricOptimalQuantitys, empericalServiceLevelData)
    
    result = {
                    'MonteCarlo iterations' : m,
                    'Sample Size': n,
                    'Target Service Level': tau,
                    'Param Values': np.mean(allParameters, axis=0),
                    #'real optimal quantity' : realOptimalQuantity,
                    #'Parm optimal quantity': np.mean(parametricOptimalQuantitys),
                    #'nonParm optimal quantity': np.mean(nonParametricOptimalQuantitys),
                    #'real optimal profit' : realExpectedProfit,
                    #'Parm optimal profit': np.mean(parametricExpectedProfits),
                    #'nonParm optimal profit': np.mean(nonParametricExpectedProfits),
                    'RMSE Ratio': rmse,
                    'SL nonParam': eslParametric,
                    'SL Param': eslNonParametric,
                    'EPLR': eplr
                }
    return result

In [40]:
numberOfMontecarloIterations = 1000
stdevDemand = 15
meanDemand = 120

tauArray = [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99]
nArray = [10, 50, 100, 200]

results = []
for tau in tauArray:
    for n in nArray:
        result = monteCarlo(numberOfMontecarloIterations, tau, n, [meanDemand, stdevDemand])
        results.append(result)


Function 'monteCarlo' executed in 1.1165s
Function 'monteCarlo' executed in 1.0435s
Function 'monteCarlo' executed in 1.0399s
Function 'monteCarlo' executed in 1.0515s
Function 'monteCarlo' executed in 1.0563s
Function 'monteCarlo' executed in 1.0457s
Function 'monteCarlo' executed in 1.0543s
Function 'monteCarlo' executed in 1.0693s
Function 'monteCarlo' executed in 1.0482s
Function 'monteCarlo' executed in 1.0695s
Function 'monteCarlo' executed in 1.0497s
Function 'monteCarlo' executed in 1.0454s
Function 'monteCarlo' executed in 1.0630s
Function 'monteCarlo' executed in 1.0418s
Function 'monteCarlo' executed in 1.0511s
Function 'monteCarlo' executed in 1.0788s
Function 'monteCarlo' executed in 1.0326s
Function 'monteCarlo' executed in 1.0471s
Function 'monteCarlo' executed in 1.0504s
Function 'monteCarlo' executed in 1.0535s
Function 'monteCarlo' executed in 1.0374s
Function 'monteCarlo' executed in 1.0432s
Function 'monteCarlo' executed in 1.0445s
Function 'monteCarlo' executed in 

In [28]:
df = pd.DataFrame(results)
df

Unnamed: 0,MonteCarlo iterations,Sample Size,Target Service Level,Param Values,RMSE Ratio,SL nonParam,SL Param,EPLR
0,1000,10,0.01,"[119.72349419762591, 13.720362743553597]",1.644325,0.003,0.093,4.257576
1,1000,50,0.01,"[120.0140967807625, 14.740757309842326]",1.640041,0.011,0.027,3.872835
2,1000,100,0.01,"[120.14130607280387, 14.858540982582157]",1.871786,0.013,0.01,4.532355
3,1000,200,0.01,"[119.98878184228278, 14.947526742899928]",1.903595,0.015,0.017,4.06087
4,1000,10,0.05,"[120.11252657015035, 13.742808256866427]",1.207038,0.052,0.105,1.737075
5,1000,50,0.05,"[120.13138641676703, 14.761916927166336]",1.303617,0.048,0.051,1.864352
6,1000,100,0.05,"[120.02817687096912, 14.90756341535131]",1.299937,0.044,0.043,1.792269
7,1000,200,0.05,"[120.0361744493198, 14.940970788928059]",1.3605,0.054,0.057,1.864932
8,1000,10,0.1,"[119.83957962745934, 13.861372332356554]",1.184618,0.122,0.118,1.530542
9,1000,50,0.1,"[119.98330792635866, 14.757411232445705]",1.251779,0.105,0.106,1.614967


In [29]:
fig = px.scatter(df, x='Target Service Level', y='RMSE Ratio', color="Sample Size", title="TSL vs RMSE, mean: {0} | stddev: {1} | m = {2}<br><sup>higher RMSE means parametric performs better</sup>".format(meanDemand, stdevDemand, numberOfMontecarloIterations))
fig.update_layout(font_size = 15)
#fig.write_image("Excercise part 3/RMSE-{0}-{1}.pdf".format(meanDemand, stdevDemand))
fig.show()

fig = px.scatter(df, x='Target Service Level', y='EPLR', color="Sample Size", title="TSL vs EPLR, mean: {0} | stddev: {1} | m = {2}<br><sup>higher EPLR means parametric performs better</sup>".format(meanDemand, stdevDemand, numberOfMontecarloIterations))
fig.update_layout(font_size = 15)
#fig.write_image("Excercise part 3/EPLR-{0}-{1}.pdf".format(meanDemand, stdevDemand))
fig.show()

In [30]:
numberOfMontecarloIterations = 1000
stdevDemand = 10
meanDemand = 110

tauArray = [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99]
nArray = [10, 50, 100, 200]

results = []
for tau in tauArray:
    for n in nArray:
        result = monteCarlo(numberOfMontecarloIterations, tau, n, [meanDemand, stdevDemand])
        results.append(result)


In [31]:
df = pd.DataFrame(results)
df

Unnamed: 0,MonteCarlo iterations,Sample Size,Target Service Level,Param Values,RMSE Ratio,SL nonParam,SL Param,EPLR
0,1000,10,0.01,"[110.02638110554308, 9.161215095216656]",1.612756,0.007,0.088,4.25641
1,1000,50,0.01,"[109.93299887119915, 9.812676258469128]",1.620333,0.01,0.018,3.703326
2,1000,100,0.01,"[109.993965924978, 9.912855319887361]",1.855067,0.012,0.013,4.501664
3,1000,200,0.01,"[110.00876857646654, 9.968495604204055]",1.876973,0.007,0.006,4.083639
4,1000,10,0.05,"[110.08706417314457, 9.291636225686597]",1.181105,0.06,0.108,1.662403
5,1000,50,0.05,"[110.00645029293547, 9.872247489272024]",1.301301,0.048,0.056,1.832646
6,1000,100,0.05,"[110.06806591169742, 9.89409568475385]",1.318215,0.041,0.038,1.826419
7,1000,200,0.05,"[110.02666490602763, 9.940700246545555]",1.303488,0.06,0.056,1.754196
8,1000,10,0.1,"[110.11971308568629, 9.282800811858227]",1.183254,0.119,0.1,1.509112
9,1000,50,0.1,"[110.02437489604264, 9.869522058633544]",1.272098,0.103,0.096,1.686979


In [32]:
fig = px.scatter(df, x='Target Service Level', y='RMSE Ratio', color="Sample Size", title="TSL vs RMSE, mean: {0} | stddev: {1} | m = {2}<br><sup>higher RMSE means parametric performs better</sup>".format(meanDemand, stdevDemand, numberOfMontecarloIterations))
fig.update_layout(font_size = 15)
#fig.write_image("Excercise part 3/RMSE-{0}-{1}.pdf".format(meanDemand, stdevDemand))
fig.show()

fig = px.scatter(df, x='Target Service Level', y='EPLR', color="Sample Size", title="TSL vs EPLR, mean: {0} | stddev: {1} | m = {2}<br><sup>higher EPLR means parametric performs better</sup>".format(meanDemand, stdevDemand, numberOfMontecarloIterations))
fig.update_layout(font_size = 15)
#fig.write_image("Excercise part 3/EPLR-{0}-{1}.pdf".format(meanDemand, stdevDemand))
fig.show()