In this notebook, we examine with the variances of the gradient estimators described in bernoulli_optimization_experiments.py

In [1]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline  

import torch
import torch.nn as nn
import torch.optim as optim 

import itertools

from copy import deepcopy

import optimization_lib as optim_lib
import rao_blackwellization_lib as rb_lib

from toy_experiment_lib import ToyExperiment

In [2]:
np.random.seed(454)
_ = torch.manual_seed(454)

# Fixed parameters

In [3]:
k = 5
p0 = torch.rand(k)

In [4]:
eta = torch.Tensor([5.])
toy_experiment = ToyExperiment(eta, p0)

# Get true gradient 

In [5]:
toy_experiment.set_parameter(eta)
full_loss = toy_experiment.get_full_loss()

In [6]:
full_loss.backward()

In [7]:
true_grad = toy_experiment.eta.grad

In [8]:
true_grad

tensor(1.00000e-02 *
       [-8.1110])

# Integrate out REINFORCE analytically, check gradient 

In [9]:
toy_experiment.set_parameter(eta)
pm_loss = toy_experiment.get_pm_loss(topk = k, use_baseline = False)

In [10]:
pm_loss.backward()

In [11]:
toy_experiment.eta.grad

tensor(1.00000e-02 *
       [-8.1110])

In [12]:
# why isn't this zero?
np.abs(true_grad - toy_experiment.eta.grad)

tensor([ 0.])

In [13]:
def assert_close(x, y, tol): 
    diff = np.abs(x - y)
    assert diff < tol, 'difference = {}, tolerance = {}'.format(diff, tol)

In [14]:
assert_close(true_grad, toy_experiment.eta.grad, tol = 1e-6)

# Sample and check REINFORCE 

In [15]:
n_samples = 10000

reinforce_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 0, use_baseline = False)
    pm_loss.backward()
    
    reinforce_grads[i] = toy_experiment.eta.grad

In [16]:
torch.mean(reinforce_grads)

tensor(1.00000e-02 *
       -7.5612)

In [17]:
torch.std(reinforce_grads) / np.sqrt(n_samples)

tensor(1.00000e-03 *
       7.8070)

In [18]:
assert_close(torch.mean(reinforce_grads), 
             true_grad, 
             3 * torch.std(reinforce_grads) / np.sqrt(n_samples))

# Check REINFORCE with baseline 

In [19]:
n_samples = 10000

reinforce_cv_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 0, use_baseline = True)
    pm_loss.backward()
    
    reinforce_cv_grads[i] = toy_experiment.eta.grad

In [20]:
torch.mean(reinforce_cv_grads)

tensor(1.00000e-02 *
       -8.7544)

In [21]:
torch.std(reinforce_cv_grads) / np.sqrt(n_samples)

tensor(1.00000e-03 *
       7.3442)

In [22]:
assert_close(torch.mean(reinforce_cv_grads), 
             true_grad, 
             3 * torch.std(reinforce_cv_grads) / np.sqrt(n_samples))

# Check Rao-blackwellization 

In [23]:
n_samples = 10000

reinforce_rb_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 3, use_baseline = True)
    pm_loss.backward()
    
    reinforce_rb_grads[i] = toy_experiment.eta.grad

In [24]:
torch.mean(reinforce_rb_grads)

tensor(1.00000e-02 *
       -7.9263)

In [25]:
torch.std(reinforce_rb_grads) / np.sqrt(n_samples)

tensor(1.00000e-03 *
       1.0559)

In [26]:
assert_close(torch.mean(reinforce_rb_grads), 
             true_grad, 
             3 * torch.std(reinforce_rb_grads) / np.sqrt(n_samples))