In [None]:

import numpy as np
import matplotlib.pyplot as plt
import scipy
import scipy.stats
import pandas as pd
from tqdm import tqdm

# Problem 1: Metropolis Hastings Sampler

In [None]:

def mh_correction(current, proposal, proposal_dist):
    proposal_relative = current - proposal + proposal_dist.mean()
    current_relative = proposal - current + proposal_dist.mean()
    proposal_prob = proposal_dist.logpdf(proposal_relative)
    current_prob = proposal_dist.logpdf(current_relative)
    return proposal_prob - current_prob


def metropolis_hastings(initial_point, n_samples, proposal_dist, target_dist_log_prob):
    states = [initial_point]
    current = initial_point
    proposal = initial_point
    accept_reject = []

    assert not np.isnan(target_dist_log_prob(current)), "Initial point has NaN log_prob under target distribution"

    for i in range(n_samples):
        # Since the proposal distribution is not symmetric, we need to offset the samples by the mean to not drift
        proposal = proposal_dist.rvs() + current - proposal_dist.mean()  # Gaussian random walk proposal
        correction = mh_correction(current, proposal, proposal_dist)

        prop_log_prob = target_dist_log_prob(proposal)
        curr_log_prob = target_dist_log_prob(current)

        acceptance = prop_log_prob - curr_log_prob + correction
        event = np.log(np.random.uniform(0, 1))
        if acceptance > event:
            current = proposal
        accept_reject.append(float(acceptance > event))

        states.append(current)

    samples = np.array(states)
    return samples, accept_reject

In [None]:
n_samples = 10000

proposal_std = 1
initial_point = 0

proposal_dist = scipy.stats.norm(loc=0, scale=proposal_std)
target_dist_unnorm = lambda x: np.sin(x) ** 2 * np.exp(-np.abs(x))
target_dist_unnorm_logspace = lambda x: np.log(target_dist_unnorm(x))


In [None]:
samples, accept_reject = metropolis_hastings(initial_point, n_samples, proposal_dist, target_dist_unnorm_logspace)

In [None]:
plt.plot(samples[:100])

In [None]:
samples = samples[100:]

In [None]:
x = np.linspace(-10, 10, 100)
plt.plot(x, target_dist_unnorm(x))
plt.hist(samples, bins=100, density=True);

# Problem 2: Gibbs Sampler

In [None]:
def gibbs_sampler_2d_normal(initial_point, n_samples, means, cov):
    samples = []
    
    x2 = initial_point

    cov11 = cov[0, 0]
    cov22 = cov[1, 1]
    cov12 = cov[0, 1]
    cov21 = cov[1, 0]
    
    mu1 = means[0]
    mu2 = means[1]
    
    for i in range(n_samples):
        mu1_given_2 = mu1 + cov12 / cov22 * (x2 - mu2)
        cov1_given_2 = cov11 - (cov12 ** 2) / cov22
        x1 = np.random.normal(mu1_given_2, np.sqrt(cov1_given_2))

        mu2_given_1 = mu2 + cov21 / cov11 * (x1 - mu1)
        cov2_given_1 = cov22 - (cov21 ** 2) / cov11
        x2 = np.random.normal(mu2_given_1, np.sqrt(cov2_given_1))
        
        samples.append([x1, x2])
        
    samples = np.array(samples)
    return samples   

In [None]:
initial_point = 0
n_samples = 10000
means = np.array([7, 3])
cov = np.array([[0.3, 0.5], [0.5, 1]])

In [None]:
samples = gibbs_sampler_2d_normal(initial_point, n_samples, means, cov)

In [None]:
plt.plot(samples)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.hist2d(samples[:, 0], samples[:, 1], bins=50, cmap='viridis');


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
(counts, x_bins, y_bins) = np.histogram2d(samples[:, 0], samples[:, 1])
ax.contourf(counts, extent=[x_bins[0], x_bins[-1], y_bins[0], y_bins[-1]])
ax.scatter(samples[-500:, 0], samples[-500:, 1], s=1, c='r');
# plt.show()

# Problem 3: Resampling techniques

In [None]:
N = 10000

x = np.random.normal(0, 1, size=N)
w = np.random.uniform(0, 1, size=N)
w = w / np.sum(w)
    
np.mean(x), np.sum(w * x)

In [None]:
def stratified_resampling(w, x, n_strata=None):
    n_strata = len(w) if n_strata is None else n_strata
    u = (np.arange(n_strata) + np.random.rand(n_strata))/n_strata
    bins = np.cumsum(w)
    return x[np.digitize(u,bins)]


def systematic_resampling(w, x, n_strata=None):
    n_strata = len(w) if n_strata is None else n_strata
    u = (np.arange(n_strata) + np.random.rand())/n_strata
    bins = np.cumsum(w)
    return x[np.digitize(u,bins)]

In [None]:
N = 10000

mean_x = []
mean_wx = []
mean_multinomial_resampled_x = []
mean_stratified_resampling = []
mean_systematic_resampling = []

for r in tqdm(range(1000)):

    x = np.random.normal(0, 1, size=N)
    w = np.random.uniform(0, 1, size=N)
    w = w / np.sum(w)

    mean_x.append(np.mean(x))
    mean_wx.append(np.sum(w * x))
    
    x_resampled = np.random.choice(x, size=N, p=w)
    mean_multinomial_resampled_x.append(np.mean(x_resampled))
    
    x_stratified = stratified_resampling(w, x)
    mean_stratified_resampling.append(np.mean(x_stratified))
    
    x_systematic = systematic_resampling(w, x)
    mean_systematic_resampling.append(np.mean(x_systematic))
    

In [None]:
np.var(mean_x), \
np.var(mean_wx), \
np.var(mean_multinomial_resampled_x), \
np.var(mean_stratified_resampling), \
np.var(mean_systematic_resampling)

# Problem 4: Path-space view

In [None]:
observation_data = pd.read_csv("./seOMXlogreturns2012to2014.csv")

observation_data = observation_data.to_numpy()[:, 0]

observation_data = observation_data[:50]

T = observation_data.shape[0]
observation_data.shape

In [None]:
phi = 0.98
sigma = 0.16
beta = 0.70

In [None]:

# Bootstrap Particle Filter
initial_particle_dist = scipy.stats.norm(0, 1)
weights = [np.array([1/N] * N)] + [None] * T
particles = [initial_particle_dist.rvs(N)] + [None] * T  # draw initial particles
mean_observation = [None] * T
prediction = [None] * T
marginal_filtering = [None] * T
ancestor_indices = [None] * T

for t in tqdm(range(T)):
    # RESAMPLE
    a_indices = np.random.choice(range(N), p=weights[t], replace=True, size=N)
    ancestor_indices[t] = a_indices

    # PROPAGATE
    # state
    proposal_dist = scipy.stats.norm(phi * particles[t][a_indices], sigma)
    particles[t+1] = proposal_dist.rvs()

    # measurement
    measurement_dist = scipy.stats.norm(0, np.sqrt(beta ** 2 * np.exp(particles[t+1])))
    # mean observation
    mean_observation[t] = scipy.stats.norm(0, np.sqrt(beta ** 2 * np.exp(np.mean(particles[t+1])))).rvs()

    # WEIGHT
    log_weights_unnorm = measurement_dist.logpdf(observation_data[t])
    weights_unnorm = np.exp(log_weights_unnorm - np.max(log_weights_unnorm))
    weights[t+1] = weights_unnorm / np.sum(weights_unnorm)

    prediction[t] = np.mean(particles[t])
    marginal_filtering[t] = np.sum(weights[t] * particles[t])

weights = np.array(weights[:-1])
particles = np.array(particles[:-1])
mean_observation = np.array(mean_observation)
prediction = np.array(prediction)
marginal_filtering = np.array(marginal_filtering)
ancestor_indices = np.array(ancestor_indices)

In [None]:
ancestor_indices.shape, particles.shape

In [None]:
import copy


def backtrack_genealogy(list_index, list_sample):
    aux_list_index = copy.deepcopy(list_index)
    genealogy = [list_sample[-1].reshape(1,-1)]
    
    for k in range(len(list_index)-1, 0, -1):
        index_previous = aux_list_index[k]
        aux_list_index[k-1] = aux_list_index[k-1][index_previous]
        genealogy.insert(0, list_sample[k-1][index_previous].reshape(1,-1))
  
    genealogy = np.concatenate(genealogy,axis =0)
    return genealogy


In [None]:
genealogy = backtrack_genealogy(ancestor_indices, particles)

In [None]:
ancestor_indices.shape, particles.shape

In [None]:
genealogy.shape

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 10))

ax.plot(list(range(T)), genealogy, marker='o', color='red')  #, linestyle='--')

for t in range(T - 1):
    p = np.array([particles[t][ancestor_indices[t+1]], particles[t+1]])
    ax.plot([t, t+1], p, marker='o', color='grey', alpha=0.5);  #, linestyle='--')

ax.plot([0, 0], [particles[0], particles[0]], marker='o', color='grey', alpha=0.5);  #, linestyle='--')