In [1]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline  

import torch
import torch.nn as nn
import torch.optim as optim 

import itertools

from copy import deepcopy

import optimization_lib as optim_lib
import rao_blackwellization_lib as rb_lib
import baselines_lib as bs_lib 

from toy_experiment_lib import ToyExperiment

In [2]:
np.random.seed(454)
_ = torch.manual_seed(454)

# Fixed parameters

In [3]:
k = 5
p0 = torch.rand(k)

In [4]:
eta = torch.Tensor([5.])
toy_experiment = ToyExperiment(eta, p0)

# Get true gradient 

In [5]:
toy_experiment.set_parameter(eta)
full_loss = toy_experiment.get_full_loss()

In [6]:
full_loss.backward()

In [7]:
true_grad = toy_experiment.eta.grad

In [8]:
true_grad

tensor([ 2.3195])

# Integrate out REINFORCE analytically, check gradient 

In [9]:
toy_experiment.set_parameter(eta)
pm_loss = toy_experiment.get_pm_loss(topk = k, grad_estimator = bs_lib.reinforce)

In [10]:
pm_loss.backward()

In [11]:
toy_experiment.eta.grad

tensor([ 2.3195])

In [12]:
# why isn't this zero?
np.abs(true_grad - toy_experiment.eta.grad)

tensor(1.00000e-07 *
       [ 2.3842])

In [13]:
def assert_close(x, y, tol): 
    diff = np.abs(x - y)
    assert diff < tol, 'difference = {}, tolerance = {}'.format(diff, tol)

In [14]:
assert_close(true_grad, toy_experiment.eta.grad, tol = 1e-6)

# Sample and check REINFORCE 

In [15]:
n_samples = 10000

reinforce_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 0, grad_estimator = bs_lib.reinforce)
    pm_loss.backward()
    
    reinforce_grads[i] = toy_experiment.eta.grad

In [16]:
torch.mean(reinforce_grads)

tensor(2.3237)

In [17]:
torch.std(reinforce_grads) / np.sqrt(n_samples)

tensor(1.00000e-02 *
       1.3519)

In [18]:
assert_close(torch.mean(reinforce_grads), 
             true_grad, 
             3 * torch.std(reinforce_grads) / np.sqrt(n_samples))

# Check REINFORCE with simple baseline 

In [19]:
n_samples = 10000

reinforce_cv_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 0, grad_estimator = bs_lib.reinforce_w_double_sample_baseline)
    pm_loss.backward()
    
    reinforce_cv_grads[i] = toy_experiment.eta.grad

In [20]:
torch.mean(reinforce_cv_grads)

tensor(2.3030)

In [21]:
torch.std(reinforce_cv_grads) / np.sqrt(n_samples)

tensor(1.00000e-02 *
       1.5539)

In [22]:
assert_close(torch.mean(reinforce_cv_grads), 
             true_grad, 
             3 * torch.std(reinforce_cv_grads) / np.sqrt(n_samples))

# Check Rao-blackwellization of simple baseline

In [23]:
n_samples = 10000

reinforce_rb_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 3, grad_estimator = bs_lib.reinforce_w_double_sample_baseline)
    pm_loss.backward()
    
    reinforce_rb_grads[i] = toy_experiment.eta.grad

In [24]:
torch.mean(reinforce_rb_grads)

tensor(2.3238)

In [25]:
torch.std(reinforce_rb_grads) / np.sqrt(n_samples)

tensor(1.00000e-03 *
       3.3631)

In [26]:
assert_close(torch.mean(reinforce_rb_grads), 
             true_grad, 
             3 * torch.std(reinforce_rb_grads) / np.sqrt(n_samples))