In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import scipy.integrate as integrate

## 2. Expectations and LotUS

The law of the unconscious statistician (LotUS) states that if $X$ is a real-valued continuous random variable with pdf $p_X$ and $h$ is a function such that ${\rm I\!E}[h(X)]$ exists, then
$${\rm I\!E}[h(X)] = \int_{-\infty}^{+\infty}h(x) p_X(x) \mathrm{d}x.$$
In the multivariate case, where $X$ is an ${\rm I\!R}^n$-valued random vector and $h:{\rm I\!R}^n\to{\rm I\!R}^m$ LotUS becomes
$${\rm I\!E}[h(X)] = \int_{{\rm I\!R}^m}h(x) p_X(x) \mathrm{d}x.$$
These integrals can be computed analytically (quite rarely) or (typically) numerically.

In [8]:
# Example 1: Suppose X ~ N(0, 1) and h(x) = cos(x)
h = lambda x: np.cos(x)
p_x = lambda x: stats.norm.pdf(x)
integrand = lambda x: h(x) * p_x(x)
e1 = integrate.quad(integrand, -np.inf, np.inf)
print(f"The expectation is ≈{e1[0]:.3f} (abs. err: {e1[1]:.2e})")

The expectation is ≈0.607 (abs. err: 9.21e-09)


### 2.1. Exercises

1. Suppose $X\sim\mathcal{N}(0, 1)$. Compute ${\rm I\!E}[X^2]$.
2. Suppose $X\sim\mathcal{N}(0, 1)$. Compute ${\rm I\!E}[\sin(X + 1)]$.
3. Suppose $X\sim\mathrm{Beta}(4, 5)$. Compute ${\rm I\!E}[X1_{\geq 0.2}(X)]$, where $1_{\geq 0.2}$ is the function
$$1_{\geq 0.2}(x) = \begin{cases}1, & \text{ if } x \geq 0.2, \\ 0, & \text{ otherwise}\end{cases}$$

### 2.2. Multivariate case

In the multivariate case we can use `scipy.integrate.nquad`. As an example, suppose $X\sim\mathcal{N}(0, \Sigma)$, where $\Sigma$ is a given symmetric positive definite covariance matrix, and we want to determine ${\rm I\!E}[\|X\|_2^2]$. Recall that $\|X\|_2^2 = X_1^2 + \ldots + X_n^2$. We will give an example with $n = 3$ and you will then generalise it to arbitrary $n$.

**Warning:** Integration in high dimensions is computationally challenging!


In [33]:
mu = [0, 0]
# Let us construct a random symmetric positive definite matrix
L = 0.1 * np.random.uniform(0, 1, (2, 2))
Sigma = L @ L.T + np.eye(2)

h = lambda *x: np.sum(np.array(x)**2) * stats.multivariate_normal.pdf(x, mu, Sigma)
M = np.inf # large number (or infinity; may take longer)
norm_squared_expectation = integrate.nquad(h, [[-M, M], [-M, M]])
print(f"Expectation of sq. norm: {norm_squared_expectation[0]:.3f} (abs. err: {norm_squared_expectation[1]:.2e})")

Expectation of sq. norm: 0.962 (abs. err: 1.39e-08)


In [36]:
# Exercise: implement the following function and record how long it takes to run for n = 3
#           (warning: it will take several minutes)
def exp_nrm_sq(n):
    """
    Computes
    :param n: dimension
    :return: expectation of ||X||^2
    """
    mu = np.zeros(n, )
    L = 0.1 * np.random.uniform(0, 1, (n, n))
    Sigma = L @ L.T + np.eye(n)
    h = 0 # Your code goes here
    norm_exp = 0 # Your code goes here
    return norm_exp

print(exp_nrm_sq(3))

4.649513601905538


### 2.3. Monte-Carlo integration*

**Note:** We saw that numerical integration using `nquad` in three dimensions is already quite slow. It is much more efficient to use a Monte-Carlo integration method. We will not go into details, but if you are interested you can play with the following block of code

In [38]:
import vegas

In [75]:
def exp_nrm_sq_fast(n):
    np.random.seed(0)
    mu = np.zeros(n, )
    L = 0.1*np.random.uniform(0, 1, (n, n))
    Sigma = L @ L.T + np.eye(n)
    h = lambda *x: (np.array(x)**2).sum() * stats.multivariate_normal.pdf(np.array(x), mu, Sigma)
    M = 100
    integ = vegas.Integrator([[-M, M]*n])
    norm_exp = integ(h, nitn=10, neval=200_000, adapt=False)
    # To increase precision we can increase either the number of iterations (nint)
    # of the number of evaluations per iteration (neval); increasing the latter
    # is more efficient.
    return norm_exp

result = exp_nrm_sq_fast(3)
print(f"Integral = {result.mean:.6f} +/- {result.sdev:.6f}")


Integral = 3.039205 +/- 0.001581
