In [2]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

Tasks 
- add analytical result for comparison with methods
- for exercise (5 - 9) add comments and verify results
- Insert picture of calculations

We define a function to generate a confidence interval given a vector of sampled values

In [3]:
def getCI(vals):
    n = len(vals)
    CL = 0.95 # confidence level
    DF = n-1 # degrees of freedom
    z = np.abs(stats.t.ppf((1-CL)/2,DF))
    mean = np.mean(vals)
    std = np.std(vals, ddof = 1)
    u = mean + z*std/np.sqrt(n)
    l = mean - z*std/np.sqrt(n)
    return mean, l, u

<h3> (1) Estimate integral using crude Monte Carlo </h3>

We sample 100 values from the uniform distribution and apply the crude method to get the estimate of the integral.

In [4]:
us = np.random.uniform(0,1, size = 100)
exp = np.exp(us)
print('The variance of the estimation is: {:.4f}'.format(np.var(exp)))
mean, l, u = getCI(exp)
print('The estimate of the integral is: {:.4f}'.format(mean))
print('With the following confidence interval: {:.4f}, {:.4f}'.format(l,u))

The variance of the estimation is: 0.2321
The estimate of the integral is: 1.6403
With the following confidence interval: 1.5443, 1.7364


<h3> (2) Estimate integral using antithetic variables </h3>

We sample 100 values from the uniform distribution and we estimate the integral using antithetic variables

In [5]:
us = np.random.uniform(0,1, size = 100)
exp = np.exp(us)
y = (exp + np.e/exp)/2

print('The variance of the estimation is: {:.4f}'.format(np.var(y)))
mean, l, u = getCI(y)
print('The estimate of the integral is: {:.4f}'.format(mean))
print('With the following confidence interval: {:.4f}, {:.4f}'.format(l,u))

The variance of the estimation is: 0.0042
The estimate of the integral is: 1.7209
With the following confidence interval: 1.7079, 1.7338


Note that the variance is reduced a lot with respect to the crude method

<h3> (3) Estimate integral using control variates </h3>

In [6]:
us = np.random.uniform(0,1, size = 100)
exp = np.exp(us)
c = -np.cov(exp, us)[0,1]/np.var(us)
z = exp + c*(us - 1/2)
print('The variance of the estimation is: {:.4f}'.format(np.var(z)))
mean, l, u = getCI(z)
print('The estimate of the integral is: {:.4f}'.format(mean))
print('With the following confidence interval: {:.4f}, {:.4f}'.format(l,u))

The variance of the estimation is: 0.0030
The estimate of the integral is: 1.7118
With the following confidence interval: 1.7009, 1.7228


Also in this case the variance is reduced with respect the crude method

<h3> (4) Estimate integral using stratified sampling </h3>

We use stratified sampling with 10 strata to get the estimate of the integral.

In [7]:
w = [] # The final list will contain ten values for the estimation of the integral
for i in range(10):
    us = np.random.uniform(0,1,10) # Each sample is based on ten uniformly distributed values
    w.append(np.sum([np.exp((j + us[j])/10) for j in range(10)])/10) # Stratified sampling

print('The variance of the estimation is: {:.4f}'.format(np.var(w)))
mean, l, u = getCI(w)
print('The estimate of the integral is: {:.4f}'.format(mean))
print('With the following confidence interval: {:.4f}, {:.4f}'.format(l,u))

The variance of the estimation is: 0.0006
The estimate of the integral is: 1.7258
With the following confidence interval: 1.7076, 1.7441


<h3> (5) Control variates for blocking queueing system simulation </h3>

control variate is the mean arrival time for the run

In [8]:
def simulate_queue(nserver, customers, mean_st, mean_tbc):
    server_time = np.zeros(nserver)
    time = 0
    blocked = 0
    t_arrival_time = 0
    for _ in range(customers):
        delta_arrival_time = stats.expon.rvs(scale = mean_tbc, size = 1)[0]
        t_arrival_time += delta_arrival_time
        time += delta_arrival_time
        min_server = np.min(server_time)
        idx_min_server = np.argmin(server_time)
        if time < min_server:
            blocked +=1
        else:
            delta_service_time = stats.expon.rvs(scale = mean_st, size = 1)
            server_time[idx_min_server] = time + delta_service_time
    
    return blocked/customers, t_arrival_time/customers

In [9]:
nserver = 10
mean_st = 8
mean_tbc = 1
customers = 10000
nsim = 10
runs = []
arrivals = []
for i in range(nsim):
    blocked, arrival = simulate_queue(nserver, customers, mean_st, mean_tbc)
    runs.append(blocked)
    arrivals.append(arrival)

runs = np.array(runs)
arrivals = np.array(arrivals)
c = -np.cov(runs, arrivals)[0,1]/np.var(arrivals)
z = runs + c*(arrivals - mean_tbc)
print(getCI(z))
print(getCI(runs))
print(np.cov(runs), np.cov(z))

(0.12357379105435233, 0.12179769757319586, 0.1253498845355088)
(0.12391, 0.11938588047557853, 0.12843411952442146)
3.999655555555557e-05 6.164333011813146e-06


The variance is indeed reduced

<h3> (6) Common random numbers in queueing system simulation </h3>

In [10]:
def getExp(lam, us):
    exp = -np.log(us)/lam
    return exp 

def getHyperExp(p, lam1, lam2, u1, u2):
    res = np.zeros(len(u1))
    res[u2 <= p] = getExp(lam = lam1, us = u1[u2 <=p])
    res[u2 > p] = getExp(lam = lam2, us = u1[u2 > p])
    return res

def simulate_queue_q2(nserver, customers, mean_st, mean_tbc, type = 'Exp', seed = 0):
    np.random.seed(seed)
    u1 = np.random.uniform(0,1, customers)
    u2 = np.random.uniform(0,1, customers)
    server_time = np.zeros(nserver)
    time = 0
    blocked = 0
    if type == 'Exp':
        arrival_times = getExp(lam = mean_tbc, us = u1)
    elif type == 'Hyp':
        arrival_times = getHyperExp(0.8, 0.8333, 5, u1, u2)
    for i in range(customers):
        delta_arrival_time = arrival_times[i]
        time += delta_arrival_time
        min_server = np.min(server_time)
        idx_min_server = np.argmin(server_time)
        if time < min_server:
            blocked += 1
        else:
            server_time[idx_min_server] = time + stats.expon.rvs(scale = mean_st, size = 1)
    
    return blocked/customers

nserver = 10
mean_st = 8
mean_tbc = 1
customers = 10000
nsim = 10
runs = []
for i in range(10):
    runs.append([simulate_queue_q2(nserver, customers, mean_st, mean_tbc, 'Hyp', i), simulate_queue_q2(nserver, customers, mean_st, mean_tbc, 'Exp', i)])

runs = np.array(runs)
stats.ttest_rel(runs[:,0], runs[:,1]), np.mean(runs[:,0] - runs[:,1])*np.sqrt(10)/np.std(runs[:,0] - runs[:,1], ddof = 1)

(TtestResult(statistic=16.68412262034125, pvalue=4.462657602263568e-08, df=9),
 16.684122620341252)

<h3> (7) Monte Carlo on standard normal random variable </h3>

Crude method

In [20]:
a = 2
tot = 1000000
values = np.random.randn(tot) > a
res = np.mean(values)
print(getCI(values))
print(res, 1 - stats.norm.cdf(a))

(0.022726, 0.022433908776859402, 0.023018091223140597)
0.022726 0.02275013194817921


Importance sampling

importance sampling reduces the amount of samples required to estimate the probability

In [30]:
a = 4
s = 1
tot = 10000

samples = stats.norm.rvs(loc = a, scale = s, size = tot)

h = samples > a
f = stats.norm.pdf(samples)
g = stats.norm.pdf(samples, loc = a, scale = s)

Z = h * f / g
res = np.mean(Z)

#print(getCI(Z))
print(res, 1 - stats.norm.cdf(a))

3.1981058435350963e-05 3.167124183311998e-05


<h3> (8) Exponential importance sampling </h3>

$$
\begin{array}{l}
g(x):=\lambda \cdot \mathrm{e}^{-\lambda \cdot x}=x \rightarrow \lambda \mathrm{e}^{-\lambda x} \\
h(x):=\mathrm{e}^x=x \rightarrow \mathrm{e}^x \\
\operatorname{simplify}\left(\frac{h(x)}{g(x)}\right) \cdot f=\frac{\mathrm{e}^{x(\lambda+1)} f}{\lambda} \\
E Z:=\int_0^1 \frac{\mathrm{e}^{x(\lambda+1)}}{\lambda} \cdot g(x) \mathrm{d} x=-1+\mathrm{e} \\
E Z 2:=\int_0^1\left(\frac{\mathrm{e}^{x(\lambda+1)}}{\lambda}\right)^2 \cdot g(x) \mathrm{d} x=\frac{\mathrm{e}^{\lambda+2}-1}{(\lambda+2) \lambda} \\
\operatorname{Var}(\lambda):=\text { evalf }(E Z 2-E Z): \\
\text { fsolve }(\operatorname{diff}(\operatorname{Var}(\lambda), \lambda)=0, \lambda, 0 . .5) \\
\end{array}
$$
$$
\lambda = 1.354829
$$


In [35]:
lam = 1.35483
size = 100000
values = stats.expon.rvs(scale = 1/lam, size = size)
f = np.logical_and(values <= 1, values>=0)
h = np.exp(values)
g = lam*np.exp(-lam*values)

res = f * h / g
np.var(res), np.mean(res)

# equal to the analytical solution (see photos on phone, done on paper)


(3.1349436524236687, 1.71996476377874)

<h3> (9) Pareto IS estimator </h3>

In [14]:
k = 1.05
size = 10000
values = stats.pareto.rvs(k-1, size = size)

h = values
f = stats.pareto.pdf(values, k)
g = stats.pareto.pdf(values, k-1)

res = h * f / g
np.mean(res), k/(k-1)

(21.000000000000075, 20.999999999999982)