In [2]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

In [3]:
def getCI(vals):
    n = len(vals)
    CL = 0.95 # confidence level
    DF = n-1 # degrees of freedom
    z = np.abs(stats.t.ppf((1-CL)/2,DF))
    mean = np.mean(vals)
    std = np.std(vals, ddof = 1)
    u = mean + z*std/np.sqrt(n)
    l = mean - z*std/np.sqrt(n)
    return mean, u, l

# q1 - crude method

In [4]:
us = np.random.uniform(0,1, size = 100)
exp = np.exp(us)
print(np.var(exp))
print(getCI(exp))

0.2544481085644955
(1.7304628428789217, 1.8310566330317244, 1.629869052726119)


# q2 - antithetic variables

In [18]:
us = np.random.uniform(0,1, size = 100)
exp = np.exp(us)
y = (exp + np.e/exp)/2
print(np.var(y))
print(getCI(y))

0.003971109344100165
(1.7178316304889574, 1.7303985108636029, 1.705264750114312)


# q3 - control variates

In [19]:
us = np.random.uniform(0,1, size = 100)
exp = np.exp(us)
c = -np.cov(exp, us)[0,1]/np.var(us)
z = exp + c*(us - 1/2)
print(np.var(z))
print(getCI(z))

0.004015359751945773
(1.724430460524101, 1.7370671638332198, 1.7117937572149822)


# q4 - stratified sampling

In [20]:
w = []
for i in range(10):
    us = np.random.uniform(0,1,10)
    w.append(np.sum([np.exp((j + us[j])/10) for j in range(10)])/10)

print(np.var(w))
print(getCI(w))

0.00013662900096104535
(1.7164159797327343, 1.7252299781012836, 1.707601981364185)


# q5 - control variates

control variate is the mean arrival time for the run

In [21]:
def simulate_queue(nserver, customers, mean_st, mean_tbc):
    server_time = np.zeros(nserver)
    time = 0
    blocked = 0
    t_arrival_time = 0
    for _ in range(customers):
        delta_arrival_time = stats.expon.rvs(scale = mean_tbc, size = 1)[0]
        t_arrival_time += delta_arrival_time
        time += delta_arrival_time
        min_server = np.min(server_time)
        idx_min_server = np.argmin(server_time)
        if time < min_server:
            blocked +=1
        else:
            delta_service_time = stats.expon.rvs(scale = mean_st, size = 1)
            server_time[idx_min_server] = time + delta_service_time
    
    return blocked/customers, t_arrival_time/customers

In [22]:
nserver = 10
mean_st = 8
mean_tbc = 1
customers = 10000
nsim = 10
runs = []
arrivals = []
for i in range(nsim):
    blocked, arrival = simulate_queue(nserver, customers, mean_st, mean_tbc)
    runs.append(blocked)
    arrivals.append(arrival)

runs = np.array(runs)
arrivals = np.array(arrivals)
c = -np.cov(runs, arrivals)[0,1]/np.var(arrivals)
z = runs + c*(arrivals - mean_tbc)
print(getCI(z))
print(getCI(runs))
print(np.cov(runs), np.cov(z))

(0.11989798911541467, 0.12168346526458784, 0.1181125129662415)
(0.11982000000000001, 0.12178887107092433, 0.11785112892907569)
7.575111111111105e-06 6.229634374080824e-06


The variance is indeed reduced

# q6 - common random numbers

In [9]:
def getExp(lam, us):
    exp = -np.log(us)/lam
    return exp 

def getHyperExp(p, lam1, lam2, u1, u2):
    res = np.zeros(len(u1))
    res[u2 <= p] = getExp(lam = lam1, us = u1[u2 <=p])
    res[u2 > p] = getExp(lam = lam2, us = u1[u2 > p])
    return res

def simulate_queue_q2(nserver, customers, mean_st, mean_tbc, type = 'Exp', seed = 0):
    np.random.seed(seed)
    u1 = np.random.uniform(0,1, customers)
    u2 = np.random.uniform(0,1, customers)
    server_time = np.zeros(nserver)
    time = 0
    blocked = 0
    if type == 'Exp':
        arrival_times = getExp(lam = mean_tbc, us = u1)
    elif type == 'Hyp':
        arrival_times = getHyperExp(0.8, 0.8333, 5, u1, u2)
    for i in range(customers):
        delta_arrival_time = arrival_times[i]
        time += delta_arrival_time
        min_server = np.min(server_time)
        idx_min_server = np.argmin(server_time)
        if time < min_server:
            blocked += 1
        else:
            server_time[idx_min_server] = time + stats.expon.rvs(scale = mean_st, size = 1)
    
    return blocked/customers

nserver = 10
mean_st = 8
mean_tbc = 1
customers = 10000
nsim = 10
runs = []
for i in range(10):
    runs.append([simulate_queue_q2(nserver, customers, mean_st, mean_tbc, 'Hyp', i), simulate_queue_q2(nserver, customers, mean_st, mean_tbc, 'Exp', i)])

runs = np.array(runs)
stats.ttest_rel(runs[:,0], runs[:,1]), np.mean(runs[:,0] - runs[:,1])*np.sqrt(10)/np.std(runs[:,0] - runs[:,1], ddof = 1)

(Ttest_relResult(statistic=16.68412262034125, pvalue=4.462657602263569e-08),
 16.684122620341252)

# q7 - montecarlo on standard normal random variable

Crude method

In [None]:
a = 2
tot = 1000000
values = np.random.randn(tot) > a
res = np.mean(values)
print(getCI(values))
print(res, 1 - stats.norm.cdf(a))

(0.022609, 0.02290035580710304, 0.02231764419289696)
0.022609 0.02275013194817921


Importance sampling

importance sampling reduces the amount of samples required to estimate the probability

In [None]:
a = 2
s = 1
tot = 10000

samples = stats.norm.rvs(loc = a, scale = s, size = tot)

h = samples > a
f = stats.norm.pdf(samples)
g = stats.norm.pdf(samples, loc = a, scale = s)

Z = h * f / g
res = np.mean(Z)

#print(getCI(Z))
print(res, 1 - stats.norm.cdf(a))

0.022382897015648693 0.02275013194817921


# q8 - exponential importance sampling

In [None]:
lam = 1.35483
size = 100000
values = stats.expon.rvs(scale = 1/lam, size = size)
f = np.logical_and(values <= 1, values>=0)
h = np.exp(values)
g = lam*np.exp(-lam*values)

res = f * h / g
np.var(res), np.mean(res)

# equal to the analytical solution (see photos on phone, done on paper)


(3.135220056680228, 1.7224825530121732)

# q9 - pareto

In [None]:
k = 1.05
size = 10000
values = stats.pareto.rvs(k-1, size = size)

h = values
f = stats.pareto.pdf(values, k)
g = stats.pareto.pdf(values, k-1)

res = h * f / g
np.mean(res), k/(k-1)

(21.000000000000075, 20.999999999999982)