In [48]:
import scipy.stats as sts
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.integrate import quad
from time import time
from numba import njit


np.random.seed(0)
def timer_func(func):
    # This function shows the execution time of 
    # the function object passed
    def wrap_func(*args, **kwargs):
        t1 = time()
        result = func(*args, **kwargs)
        t2 = time()
        print(f'Function {func.__name__!r} executed in {(t2-t1):.4f}s')
        return result
    return wrap_func

# Estimators
def nonParametricOptimalQuantity(distributionData, tau):
    optimalQuantity = np.quantile(distributionData,tau)
    return optimalQuantity

def parametricOptimalQuantity(tau, dist, parameters):
    optimalQuantity = dist.ppf(tau, *parameters)
    return optimalQuantity

def realParamatricOptimalQuantity(tau, realDist):
    optimalQuantity = realDist.ppf(tau)
    return optimalQuantity

# Optimized code for integrating the lognormal distribution (very quickly), @njit is compiling the python code to C

@njit
def lognormal_pdf(x, shape, location, scale):
    mu = np.log(scale)
    sigma = shape
    if x <= 0:
        return 0
    exponent = -(np.log(x) - mu)**2 / (2 * sigma**2)
    coefficient = 1 / (x * sigma * np.sqrt(2 * np.pi))
    return x * coefficient * np.exp(exponent)

def integrate_lognormal(Q_star,parameters):
    return quad(lognormal_pdf,0,Q_star,args=(parameters[0], parameters[1],parameters[2]))[0]

#####################################################################################################################

def calculateOptimalProfit(tau,Q_star,dist, paramaters):
    price = 1
    cost = 1 - tau

    integral_value = integrate_lognormal(Q_star, paramaters)
    t1 = price * integral_value
    t2 = price * Q_star * dist.sf(Q_star)
    t3 = cost * Q_star
    return t1 + t2 - t3

# Evaluation of test statistics functions
def empericalRootMeanSquaredError(m, optimalQuantities, realOptimalQuantity):
    rmse = np.sqrt(1/m * np.sum((optimalQuantities - realOptimalQuantity)**2))
    return rmse

def empericalRootMeanSquaredErrorRatio(m, parametricOptimalQuantitys, nonParametricOptimalQuantitys, realOptimalQuantity):
    return empericalRootMeanSquaredError(m, nonParametricOptimalQuantitys, realOptimalQuantity) / empericalRootMeanSquaredError(m, parametricOptimalQuantitys, realOptimalQuantity)

def empericalProfitLoss(m, profitEstimator, profitReal):
    return 1 / m * np.sum(np.abs((profitReal - profitEstimator) /  profitReal))

def empericalProfitLossRatio(m, parametricExpectedProfit, nonParametricExpectedProfit, realExpectedProfit):
    return empericalProfitLoss(m, nonParametricExpectedProfit, realExpectedProfit) / empericalProfitLoss(m, parametricExpectedProfit, realExpectedProfit)

def empericalServiceLevel(m, optimalQuantities, demand):
    indicatorFunction = np.where(optimalQuantities.T >= demand.T, 1, 0)
    return 1 / m * np.sum(indicatorFunction)

#@timer_func
def monteCarlo(m, tau, n, params):
    '''
    When changing distribution make sure to:
    1. change parmeter array to hold x parameters (the same as fit function)
    2. change dist and distreal
    3. change function that will be used for integration
    '''
    
    # Distribution that the paramatric estimator will be using
    dist = sts.lognorm
    # How the data is really distributed, when changing remember to change the PDF for the precompiled function.
    distReal = sts.lognorm(*params)

    demand = np.empty((m,1))
    realOptimalQuantity = np.empty((m,1))
    parametricOptimalQuantitys = np.empty((m,1))
    nonParametricOptimalQuantitys = np.empty((m,1))
    allParameters = np.empty((m,2))
    parametricExpectedProfits = np.empty((m,1))
    nonParametricExpectedProfits = np.empty((m,1))
    empericalServiceLevelData = np.empty((m,1))

    # change param array to contain the right amount of paramaters when changing distribution
    allParameters = np.empty((m,3))

    # Compute estimators and optimal quantities
    for j in range(m):
        distributionData = distReal.rvs(size = n)
        # make sure to change the fit function for different distributions
        allParameters[j] = parameters = dist.fit(distributionData, floc=0)
        demand[j] = parameters[0] # in case of a normal distribution
        parametricOptimalQuantitys[j] = parametricOptimalQuantity(tau, dist, parameters)
        nonParametricOptimalQuantitys[j] = nonParametricOptimalQuantity(distributionData, tau)
        empericalServiceLevelData[j] = distributionData[-1]
    
    # We use Numba to compile the python code to C
        parametricExpectedProfits[j] = calculateOptimalProfit(tau, parametricOptimalQuantitys[j], distReal, params)
        nonParametricExpectedProfits[j] = calculateOptimalProfit(tau, nonParametricOptimalQuantitys[j], distReal, params)
    
    realOptimalQuantity = realParamatricOptimalQuantity(tau, distReal)
    realExpectedProfit = calculateOptimalProfit(tau, realOptimalQuantity, distReal, params)

    eplr = empericalProfitLossRatio(m, parametricExpectedProfits, nonParametricExpectedProfits, realExpectedProfit)

    # Compute evaluation statistics
    rmse = empericalRootMeanSquaredErrorRatio(m, parametricOptimalQuantitys, nonParametricOptimalQuantitys, realOptimalQuantity)
    eslParametric = empericalServiceLevel(m, parametricOptimalQuantitys, empericalServiceLevelData)
    eslNonParametric = empericalServiceLevel(m, nonParametricOptimalQuantitys, empericalServiceLevelData)
    
    result = {
                    'MonteCarlo iterations' : m,
                    'Sample Size': n,
                    'Target Service Level': tau,
                    'Param Values': np.mean(allParameters, axis=0),
                    #'real optimal quantity' : realOptimalQuantity,
                    #'Parm optimal quantity': np.mean(parametricOptimalQuantitys),
                    #'nonParm optimal quantity': np.mean(nonParametricOptimalQuantitys),
                    #'real optimal profit' : realExpectedProfit,
                    #'Parm optimal profit': np.mean(parametricExpectedProfits),
                    #'nonParm optimal profit': np.mean(nonParametricExpectedProfits),
                    'RMSE Ratio': rmse,
                    'SL nonParam': eslParametric,
                    'SL Param': eslNonParametric,
                    'EPLR': eplr
                }
    return result

In [49]:
numberOfMontecarloIterations = 1000
sigma = 0.6
mu = 6

tauArray = [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95, 0.99]
nArray = [10, 50, 100, 200]

results = []
for tau in tauArray:
    for n in nArray:
        result = monteCarlo(numberOfMontecarloIterations, tau, n, [sigma, 0, np.exp(mu)])
        results.append(result)


In [54]:
df = pd.DataFrame(results)
df

Unnamed: 0,MonteCarlo iterations,Sample Size,Target Service Level,Param Values,RMSE Ratio,SL nonParam,SL Param,EPLR
0,1000,10,0.01,"[0.5488145097421433, 0.0, 405.6534371550081]",2.036058,0.003,0.093,5.306869
1,1000,50,0.01,"[0.5896302923936928, 0.0, 405.254013787701]",1.917783,0.011,0.027,4.856302
2,1000,100,0.01,"[0.594341639303287, 0.0, 406.40406557639267]",2.070199,0.013,0.01,5.252255
3,1000,200,0.01,"[0.5979010697159988, 0.0, 403.56979040305015]",1.984877,0.015,0.017,4.388735
4,1000,10,0.05,"[0.5497123302746572, 0.0, 412.68547490578794]",1.376578,0.052,0.105,2.039442
5,1000,50,0.05,"[0.590476677086653, 0.0, 407.0828507118977]",1.379938,0.048,0.051,2.029532
6,1000,100,0.05,"[0.5963025366140522, 0.0, 404.61337325726214]",1.345436,0.044,0.043,1.885966
7,1000,200,0.05,"[0.5976388315571219, 0.0, 404.3851312382248]",1.367413,0.054,0.057,1.887126
8,1000,10,0.1,"[0.5544548932942619, 0.0, 408.2772906202946]",1.303247,0.122,0.118,1.754797
9,1000,50,0.1,"[0.590296449297828, 0.0, 404.5664120373448]",1.28632,0.105,0.106,1.685652


In [53]:
fig = px.scatter(df, x='Target Service Level', y='RMSE Ratio', color="Sample Size", title="TSL vs RMSE, Lognormal mu: {0} | sigma: {1} | m = {2}<br><sup>higher RMSE means parametric performs better</sup>".format(mu, sigma, numberOfMontecarloIterations))
fig.update_layout(font_size = 15)
#fig.write_image("Excercise part 3/RMSE-lognorm-{0}-{1}.pdf".format(mu, sigma))
fig.show()

fig = px.scatter(df, x='Target Service Level', y='EPLR', color="Sample Size", title="TSL vs EPLR, mu: {0} | sigma: {1} | m = {2}<br><sup>higher EPLR means parametric performs better</sup>".format(mu, sigma, numberOfMontecarloIterations))
fig.update_layout(font_size = 15)
#fig.write_image("Excercise part 3/EPLR-lognorm-{0}-{1}.pdf".format(mu, sigma))
fig.show()