In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
import pandas as pd
from tqdm import tqdm

# Problem 1

## 1.b

In [None]:
N = 10000

proposal = scipy.stats.norm(0, 1)
target = scipy.stats.uniform(0, 4)

samples = proposal.rvs(size=N)
weights = target.pdf(samples) / proposal.pdf(samples)

In [None]:
plt.hist(samples, bins=100, weights=weights);

The proposal is poor with very low densities toward x > 3 and too high density for x < 0.

In [None]:
N = 10000

proposal = scipy.stats.norm(2, 1)
target = scipy.stats.uniform(0, 4)

samples = proposal.rvs(size=N)
weights = target.pdf(samples) / proposal.pdf(samples)

In [None]:
plt.hist(samples, bins=100, weights=weights);

## 1.c

In [None]:
N = 10000

proposal = scipy.stats.norm(2, 1)
target = scipy.stats.uniform(0, 4)

samples = proposal.rvs(size=N)
weights = target.pdf(samples) / proposal.pdf(samples)

np.mean(weights * samples)

In [None]:
proposal = scipy.stats.norm(2, 1)
target = scipy.stats.uniform(0, 4)

all_N = list(range(10, 10000, 10))
all_weights = []
for N in all_N:
    samples = proposal.rvs(size=N)
    weights = target.pdf(samples) / proposal.pdf(samples)
    all_weights.append(np.mean(weights * samples))

plt.plot(all_N, all_weights)

## 1.d

$$
\begin{align}
\widehat{Z} &= \int \tilde{\pi}(x) dx \\
&= \int q(x)\frac{\tilde{\pi}(x)}{q(x)} dx \\
&= \mathbb{E}_q \left[ \frac{\tilde{\pi}(x)}{q(x)} \right] \\
&\approx \frac{1}{N} \sum_{i=1}^N \frac{\tilde{\pi}(x^i)}{q(x^i)} \\
&\approx \frac{1}{N} \sum_{i=1}^N \tilde{W}^i \\
\end{align}
$$

## 1.e

In [None]:
N = 10000

def target_unnorm(x, a=0, b=4):
    out = np.zeros_like(x)
    idx = (a <= x) * (x <= b)
    out[idx] = 1
    if out.ndim == 2:
        out = (out.sum(axis=1) > 0).astype(np.float)
    elif out.ndim > 2:
        raise ValueError("Dimension > 2")
    return out

proposal = scipy.stats.norm(0, 1)
proposal = scipy.stats.norm(2, 1)
proposal = scipy.stats.uniform(0, 4)

samples = proposal.rvs(size=N)
weights_unnorm = target_unnorm(samples) / proposal.pdf(samples)

np.mean(weights_unnorm)


In [None]:
#proposal = scipy.stats.norm(0, 1)
proposal = scipy.stats.norm(2, 1)
#proposal = scipy.stats.uniform(0, 4)

all_N = list(range(10, 10000, 10))
all_weights = []
for N in all_N:
    samples = proposal.rvs(size=N)
    weights_unnorm = target_unnorm(samples) / proposal.pdf(samples)
    all_weights.append(np.mean(weights_unnorm))

plt.plot(all_N, all_weights)

## 1.f

This is self-normalized importance sampling. We normalize the weights.

In [None]:
proposal = scipy.stats.norm(0, 3)

all_N = list(range(1, 1000, 1))
all_est = []
for N in all_N:
    samples = proposal.rvs(size=N)
    weights_unnorm = target_unnorm(samples) / proposal.pdf(samples)
    Z_hat = np.mean(weights_unnorm)
    varphi = np.mean(weights_unnorm * samples) / Z_hat
    all_est.append(varphi)

plt.plot(all_N, all_est)


In [None]:
# Showing that the self-normalized importance sampler is biased (with an assymmetric proposal)

proposal = scipy.stats.norm(0, 3)

all_N = list(range(1, 100))
all_est = []
for N in tqdm(all_N):
    tmp_est = []
    for r in range(1000):
        samples = proposal.rvs(size=N)
        weights_unnorm = target_unnorm(samples) / proposal.pdf(samples)
        Z_hat = np.mean(weights_unnorm)
        varphi = np.mean(weights_unnorm * samples) / Z_hat
        tmp_est.append(varphi)
    
    all_est.append(np.mean(tmp_est))  # mean of MC estimates (to remove MC estimate variance)

plt.plot(all_N, all_est)


## 1.g

In [None]:
# more efficient/direct way of computing

proposal = scipy.stats.norm(0, 3)

all_N = list(range(1, 1000, 1))
all_est = []
for N in all_N:
    samples = proposal.rvs(size=N)
    weights_unnorm = target_unnorm(samples) / proposal.pdf(samples)
    weights = weights_unnorm / np.sum(weights_unnorm)
    varphi = np.sum(weights * samples)
    all_est.append(varphi)

plt.plot(all_N, all_est)

# Problem 2

In [None]:
N = 1000

def multivariate_uniform_pdf(x, a=-0.5, b=0.5):
    target = scipy.stats.uniform(a, b)
    pdf = list(np.prod(target.pdf(x_)) for x_ in x)  # (N, D)
    return pdf

all_weights = []
n_nonzero_weights = []

dims = list(range(1, 10, 1))
for d in tqdm(dims):
    proposal = scipy.stats.multivariate_normal([0] * d, 1)
    samples = proposal.rvs(size=N)
    weights = multivariate_uniform_pdf(samples) / proposal.pdf(samples)

    all_weights.append(weights)

    n_nonzero_weights.append(np.sum(weights > 0) / N)

plt.plot(dims, n_nonzero_weights)

# Problem 3

## 3.a

We will get NaN weights because the weights become so close to zero that they cannot be represented as different from zero in the given floating point precision. We then divide by 0.

In [None]:
N = 10
d = 100


def target_unnorm(x, a=0, b=4):
    out = np.zeros_like(x)
    idx = (a <= x) * (x <= b)
    out[idx] = 1
    if out.ndim == 2:
        out = np.prod(out, axis=1)  # hypercube
    elif out.ndim > 2:
        raise ValueError("Dimension > 2")
    return out


all_weights = []
n_nonzero_weights = []

proposal = scipy.stats.multivariate_normal([2] * d, 1)
samples = proposal.rvs(size=N)
log_weights_unnorm = target_unnorm(samples) - proposal.logpdf(samples)
weights_unnorm = np.exp(log_weights_unnorm)
weights = weights_unnorm / np.sum(weights_unnorm)

np.sum(weights[:, np.newaxis] * samples, axis=0)

In [None]:
N = 10
d = 1000

all_weights = []
n_nonzero_weights = []

proposal = scipy.stats.multivariate_normal([2] * d, 1)
samples = proposal.rvs(size=N)
log_weights_unnorm = target_unnorm(samples) - proposal.logpdf(samples)
weights = np.exp(log_weights_unnorm - np.max(log_weights_unnorm))

np.sum(weights[:, np.newaxis] * samples, axis=0)

This final approach has good numerical stability and is valid since subtraction in log space is equivalent to division in real space.

# Problem 4

Estimate the marginal ﬁltering distribution at each time index t = 1, . . . , T using the bootstrap particle ﬁlter with N = 500 particles

$p(x_t| y_{1:t})$

In [None]:
observation_data = pd.read_csv("./seOMXlogreturns2012to2014.csv")
T = observation_data.shape[0]
observation_data = observation_data.to_numpy()[:, 0]
observation_data.shape

In [None]:
phi = 0.98
sigma = 0.16
beta = 0.70

N = 500


In [None]:
# Bootstrap Particle Filter
initial_particle_dist = scipy.stats.norm(0, 1)
weights = [np.array([1/N] * N)] + [None] * T
particles = [initial_particle_dist.rvs(N)] + [None] * T  # draw initial particles
mean_observation = [None] * T
prediction = [None] * T
marginal_filtering = [None] * T

for t in tqdm(range(T)):
    # RESAMPLE
    ancestor_indices = np.random.choice(range(N), p=weights[t], replace=True, size=N)

    # PROPAGATE
    # state
    proposal_dist = scipy.stats.norm(phi * particles[t][ancestor_indices], sigma)
    particles[t+1] = proposal_dist.rvs()

    # measurement
    measurement_dist = scipy.stats.norm(0, np.sqrt(beta ** 2 * np.exp(particles[t+1])))
    # mean observation
    mean_observation[t] = scipy.stats.norm(0, np.sqrt(beta ** 2 * np.exp(np.mean(particles[t+1])))).rvs()

    # WEIGHT
    weights[t+1] = measurement_dist.logpdf(observation_data[t])
    weights[t+1] = np.exp(weights[t+1] - np.max(weights[t+1]))
    weights[t+1] = weights[t+1] / np.sum(weights[t+1])

    prediction[t] = np.mean(particles[t])
    marginal_filtering[t] = np.sum(weights[t] * particles[t])

weights = np.array(weights[:-1])
particles = np.array(particles[:-1])
mean_observation = np.array(mean_observation)
prediction = np.array(prediction)
marginal_filtering = np.array(marginal_filtering)

In [None]:
mean_particle = np.mean(particles, axis=1)
weighted_particle = np.sum(weights * particles, axis=1)

In [None]:
plt.title("Prediction")
plt.plot(observation_data)
plt.plot(prediction)

In [None]:
plt.title("Marginal filtering")
plt.plot(observation_data)
plt.plot(marginal_filtering)

In [None]:
plt.title("Mean observation")
plt.plot(observation_data)
plt.plot(mean_observation)