In this notebook, we examine with the variances of the gradient estimators described in bernoulli_optimization_experiments.py

In [1]:
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline  

import torch
import torch.nn as nn
import torch.optim as optim 

import itertools

from copy import deepcopy

import optimization_lib as optim_lib
import rao_blackwellization_lib as rb_lib

In [2]:
np.random.seed(454)
_ = torch.manual_seed(454)

# Set up experiment class

In [3]:
class ToyExperiment(object): 
    def __init__(self, eta, p0):
        # number of categories
        self.k = len(p0)
        
        self.p0 = p0
        
        # the parameter
        self.set_parameter(eta) 
        
        #
        self.log_softmax = torch.nn.LogSoftmax(dim = 0)
    
    def set_parameter(self, eta): 
        self.eta = deepcopy(eta)
        self.eta.requires_grad_(True)
    
    def get_log_q(self):
        return self.log_softmax(self.eta * self.p0).view(1, self.k).detach()
    
    def get_f_z(self, z): 
        if isinstance(5, int): 
            z = torch.Tensor([z])
        
        return (z.float() - self.eta) ** 2
    
    def get_pm_loss(self, topk = 0, use_baseline = False): 
        log_class_weights = self.get_log_q()
        return rb_lib.get_raoblackwell_ps_loss(self.get_f_z, log_class_weights, topk,
                                use_baseline)
    def get_full_loss(self): 
        log_class_weights = self.get_log_q()
        class_weights = torch.exp(log_class_weights)
        
        return rb_lib.get_full_loss(self.get_f_z, class_weights)

# Fixed parameters

In [4]:
k = 5
p0 = torch.rand(k)

In [5]:
eta = torch.Tensor([5.])
toy_experiment = ToyExperiment(eta, p0)

# Get true gradient 

In [6]:
toy_experiment.set_parameter(eta)
full_loss = toy_experiment.get_full_loss()

In [7]:
full_loss.backward()

In [8]:
true_grad = toy_experiment.eta.grad

In [9]:
true_grad

tensor([ 2.9941])

# Integrate out REINFORCE analytically, check gradient 

In [10]:
toy_experiment.set_parameter(eta)
pm_loss = toy_experiment.get_pm_loss(topk = k, use_baseline = False)

In [13]:
pm_loss.backward()

In [14]:
toy_experiment.eta.grad

tensor([ 2.9941])

In [15]:
# why isn't this zero?
np.abs(true_grad - toy_experiment.eta.grad)

tensor(1.00000e-07 *
       [ 2.3842])

In [16]:
def assert_close(x, y, tol): 
    diff = np.abs(x - y)
    assert diff < tol, 'difference = {}, tolerance = {}'.format(diff, tol)

In [17]:
assert_close(true_grad, toy_experiment.eta.grad, tol = 1e-6)

# Sample and check REINFORCE 

In [18]:
n_samples = 10000

reinforce_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 0, use_baseline = False)
    pm_loss.backward()
    
    reinforce_grads[i] = toy_experiment.eta.grad

In [19]:
torch.mean(reinforce_grads)

tensor(2.9788)

In [20]:
torch.std(reinforce_grads) / np.sqrt(n_samples)

tensor(1.00000e-02 *
       1.6699)

In [21]:
assert_close(torch.mean(reinforce_grads), 
             true_grad, 
             3 * torch.std(reinforce_grads) / np.sqrt(n_samples))

# Check REINFORCE with baseline 

In [22]:
n_samples = 10000

reinforce_cv_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 0, use_baseline = True)
    pm_loss.backward()
    
    reinforce_cv_grads[i] = toy_experiment.eta.grad

In [23]:
torch.mean(reinforce_cv_grads)

tensor(2.9974)

In [24]:
torch.std(reinforce_cv_grads) / np.sqrt(n_samples)

tensor(1.00000e-02 *
       1.6955)

In [25]:
assert_close(torch.mean(reinforce_cv_grads), 
             true_grad, 
             3 * torch.std(reinforce_cv_grads) / np.sqrt(n_samples))

# Check Rao-blackwellization 

In [26]:
n_samples = 10000

reinforce_rb_grads = torch.zeros(n_samples)

for i in range(n_samples): 
    toy_experiment.set_parameter(eta)
    pm_loss = toy_experiment.get_pm_loss(topk = 3, use_baseline = True)
    pm_loss.backward()
    
    reinforce_rb_grads[i] = toy_experiment.eta.grad

In [27]:
torch.mean(reinforce_rb_grads)

tensor(2.9936)

In [28]:
torch.std(reinforce_rb_grads) / np.sqrt(n_samples)

tensor(1.00000e-04 *
       2.9131)

In [29]:
assert_close(torch.mean(reinforce_rb_grads), 
             true_grad, 
             3 * torch.std(reinforce_rb_grads) / np.sqrt(n_samples))

AssertionError: difference = tensor(1.00000e-04 *
       [ 5.3406]), tolerance = 0.00029131374321877956