In [None]:
# Ignore this cell when running on colab
!nvidia-smi

In [None]:
# this one too!
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt 

In [None]:
# PART 1

# define random stochatic matrix
# NOTE: in principle it would be interesting how stuff behaves
# as n becomes larger. but then the random matrix will become
# more and more uniform, making it quite boring :(
# so the number of states is kept rather small.
n_states = 20
n_steps = 10
log_transition_matrix = tf.random.normal([n_states, n_states])
transition_matrix = tf.nn.softmax(log_transition_matrix, axis=0)

In [None]:
# define initial distribution v0, repeatedly multiply with A
# -> should always result in approximately the same v after
#    a few iterations
# bonus: compute eigenvectors/values of A; compare to v

v_start = tf.random.normal([n_states, 1])
v_start = tf.nn.softmax(v_start, axis=0)

v_new = v_start
diffs = []
for st in range(n_steps):
    v_old = v_new
    v_new = tf.matmul(transition_matrix, v_new)
    diffs.append(((v_new - v_old)**2).numpy().sum())
    
# we can see that v converges very quickly
plt.plot(np.arange(1, len(diffs)+1), diffs)
plt.xlabel("Steps")
plt.ylabel("Change in v")
plt.show()

print("v0")
print(v_start)

print("\n A")
print(transition_matrix)

print("\n v'")
print(v_new) 

In [None]:
# unless np.eig behaves quirkily, this should be about the same
eigenvector = np.linalg.eig(transition_matrix)[1][:, 0]
print((eigenvector / eigenvector.sum()).real)

In [None]:
# now we run a Markov chain!
# x = 0 as initial state is arbitrary
# could be any state, or sample it randomly

x = 0

all_samples = []
n_steps = 10000
for st in range(n_steps):
    # the indexing here is a bit annoying. basically we just grab the column
    # that corresponds to the current state
    x = tf.random.categorical(log_transition_matrix[tf.newaxis, :, x], 1)[0, 0]
    #print(x)
    all_samples.append(x.numpy()) 

In [None]:
# blue bars: empirical distribution
# orange stars: distribution via A*v
plt.hist(all_samples, bins=np.linspace(-0.5, n_states-0.5, n_states+1), rwidth=0.9)
plt.plot(v_new.numpy()*n_steps, "*")
plt.xlabel("State index")
plt.ylabel("Occurrences")
plt.show() 

In [None]:
### PART 2
import tensorflow_probability as tfp
tfd = tfp.distributions

# loc is the most important parameter! rerun this multiple times with different values,
# e.g. start with 0.1, try 1., 3., 5...
# FYI tfp might crash if you provide an integer, so always make it a float...

location = 3
scale = 1.
sample = np.array([0.,0.])  # initial sample -- arbitrary
# "balanced" Gaussian mixture
basic_distribution = tfd.MixtureSameFamily(tfd.Categorical(probs=[0.5, 0.5]), 
                                   tfd.MultivariateNormalDiag(loc=[[-location, -location], [location, location]],
                                                             scale_identity_multiplier=scale))

n_steps = 1000
samples = [sample]
for step in range(n_steps):
    sample = sample.copy()
    # sample new x1 (given OLD x2)
    mixing_score1 = tfd.Normal(loc=-location, scale=scale).prob(sample[1])
    mixing_score2 = tfd.Normal(loc=location, scale=scale).prob(sample[1])
    pi1 = mixing_score1 / (mixing_score1 + mixing_score2)
    pi2 = mixing_score2 / (mixing_score1 + mixing_score2)
    # the normal distributions stay the same (same mean)
    # but the mixture coefficients are different.
    # note that we are now using a 1D normal because we are only sampling one variable
    conditional_distribution = tfd.MixtureSameFamily(tfd.Categorical(probs=[pi1, pi2]),
                                      tfd.Normal(loc=[-location, location],
                                                 scale=scale))
    sample[0] = conditional_distribution.sample().numpy()
    
    # sample new x2 given the NEW x1!!
    mixing_score1 = tfd.Normal(loc=-location, scale=scale).prob(sample[0])
    mixing_score2 = tfd.Normal(loc=location, scale=scale).prob(sample[0])
    pi1 = mixing_score1 / (mixing_score1 + mixing_score2)
    pi2 = mixing_score2 / (mixing_score1 + mixing_score2)
    conditional_distribution = tfd.MixtureSameFamily(tfd.Categorical(probs=[pi1, pi2]),
                                      tfd.Normal(loc=[-location, location],
                                                 scale=scale))
    sample[1] = conditional_distribution.sample().numpy()
    samples.append(sample)
samples = np.array(samples) 

In [None]:
lim = 2*location if location >= 2 else 4
grid_points = np.linspace(-lim, lim, 1000)
grid_x, grid_y = np.meshgrid(grid_points, grid_points)
grid = np.stack([grid_x, grid_y], axis=-1) 

In [None]:
# contour plot shows the real distribution
# black dots show samples
# lines show the steps between samples
# the earliest samples have lines in orange and over the course of sampling,
# the line becomes more blue

plt.figure(figsize=(10, 10))
plt.contour(grid_x, grid_y, basic_distribution.prob(grid))

color_gradient = np.linspace(0,1,len(samples))
for i in range(len(samples)-1):
    plt.plot(samples[i:i+2, 0],samples[i:i+2, 1], color=(1-color_gradient[i],
                                                         0.5,
                                                         color_gradient[i]), alpha=0.3)

#plt.plot(samples[:,0], samples[:,1], c="red", alpha=0.3, lw=0.6)
plt.plot(samples[:,0], samples[:,1], ".", c="black", alpha=0.5)
plt.show() 

In [None]:
# heat map gives a better view of whether both modes have been sampled evenly
plt.figure(figsize=(10,10))
plt.hist2d(samples[:,0], samples[:,1], bins=30)
plt.show()

# bonus: this could be done more properly via statistical testing.
# i.e. check whether the amount of samples per quadrant is significantly
# different from what we would expect (should be about equal) 

In [None]:
# a simple count
quadrant1 = np.logical_and(samples[:, 0] >= 0, samples[:, 1] >= 0).sum()
#quadrant2 = np.logical_and(samples[:, 0] >= 0, samples[:, 1] < 0).sum()
#quadrant3 = np.logical_and(samples[:, 0] < 0, samples[:, 1] >= 0).sum()
quadrant4 = np.logical_and(samples[:, 0] < 0, samples[:, 1] < 0).sum()

print(quadrant1, quadrant4)

In [None]:
# Optional Part 3 -- Importance Sampling
# we sample from q, but try to estimate the average norm for samples from p
# (here called basic_dist).

# for this we use the "correction factor" in importance sampling.

# try this for different locations again and note how the estimate
# breaks down as p moves away from q.

# warning: can take a while to run!

def try_importance_sampling(n_samples, location, scale=1.):
    q_distribution = tfd.MultivariateNormalDiag(loc=[0.,0.], scale_identity_multiplier=1.)
    basic_distribution = tfd.MixtureSameFamily(tfd.Categorical(probs=[0.5, 0.5]), 
                                       tfd.MultivariateNormalDiag(loc=[[-location, -location], [location, location]],
                                                                 scale_identity_multiplier=scale))

    true_norms = []
    fake_norms = []
    for step in range(n_samples):
        if not step % 500:
            print("Went through", step)
        true_sample = basic_distribution.sample()
        true_norms.append(tf.sqrt(tf.reduce_sum(true_sample**2)).numpy())

        fake_sample = q_distribution.sample()
        f = tf.sqrt(tf.reduce_sum(fake_sample**2)) * basic_distribution.prob(fake_sample) / q_distribution.prob(fake_sample)
        fake_norms.append(f.numpy())

    cumulative_norm_sum = np.cumsum(true_norms)
    cumulative_norm_sum /= np.arange(1,len(cumulative_norm_sum)+1)

    cumulative_norm_sum_fake = np.cumsum(fake_norms)
    cumulative_norm_sum_fake /= np.arange(1, len(cumulative_norm_sum_fake)+1)

    plt.figure(figsize=(16,8))
    plt.plot(cumulative_norm_sum, label="Real samples")
    plt.plot(cumulative_norm_sum_fake, label="Importance sampling")
    plt.xlabel("Number of samples")
    plt.ylabel("Estimate")
    plt.legend(loc="lower right")
    plt.show()

In [None]:
try_importance_sampling(5000, 0.1)

In [None]:
try_importance_sampling(5000, 0.5)

In [None]:
try_importance_sampling(5000, 1.)

In [None]:
try_importance_sampling(5000, 2.)

In [None]:
try_importance_sampling(5000, 5.)

In [None]:
# FOR ASSIGNMENT 3
# simple way to get samples from a bernoulli distribution
distribution = tfd.Bernoulli(probs=0.5)
distribution.sample()

In [None]:
# can also give dimensions to sample, to get more samples
batch_size = 4
n_v = 3

# here, each sample is 1 with probability 0.5
distribution.sample((batch_size, n_v))

In [None]:
# we can also create a "multi-dimensional distribution"
# with different probabilities per dimension

# we could pretend that these are RBM outputs
probs_v_given_h = tf.nn.sigmoid(tf.random.normal((batch_size, n_v)))

# same shape as above, however each sample is 1 with a different probability!
distribution = tfd.Bernoulli(probs=probs_v_given_h)
distribution.sample()

In [None]:
# these are the corresponding probabilities
probs_v_given_h