In [2]:
import torch
import numpy as np


In [3]:
EPS=1e-8

def gaussian_likelihood_solu(x, mu, log_std):
    pre_sum = -0.5 * (((x-mu)/(torch.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi))
    return pre_sum.sum(axis=-1)

In [4]:
def print_result(correct=False):
    print('\n'*5 + '='*50 + '\n'*3)
    if correct:
        print("Congratulations! Your answer is correct.")
    else:
        print("Your answer appears to be incorrect. Try again!")
    print('\n'*3 + '='*50)

In [25]:
def gaussian_likelihood(x, mu, log_std):
    """
    Args:
        x: Tensor with shape [batch, dim]
        mu: Tensor with shape [batch, dim]
        log_std: Tensor with shape [batch, dim] or [dim]

    Returns:
        Tensor with shape [batch]
    """ 
    #######################
    #                     #
    #   YOUR CODE HERE    #
    #                     #
    #######################
    k = x.shape[1]
    # pre_sum = -0.5 * (((x-mu)/(torch.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi))

    pi_sum = -0.5 *((x - mu)**2 / np.exp(log_std)**2 + 2 * log_std + np.log(2 * np.pi))
    return pi_sum.sum(axis=-1)


if __name__ == '__main__':
    """
    Run this file to verify your solution.
    """

    batch_size = 32
    dim = 10

    x = torch.rand(batch_size, dim)
    mu = torch.rand(batch_size, dim)
    log_std = torch.rand(dim)
    
    your_gaussian_likelihood = gaussian_likelihood(x, mu, log_std)
    true_gaussian_likelihood = gaussian_likelihood_solu(x, mu, log_std)

    your_result = your_gaussian_likelihood.detach().numpy()
    true_result = true_gaussian_likelihood.detach().numpy()

    correct = np.allclose(your_result, true_result)
    print_result(correct)









Congratulations! Your answer is correct.





In [23]:
# batch = 2
# dim = 3
a = torch.tensor([[1,2,3], 
                  [1,2,3]])
mu = torch.tensor([[4,5,6], 
                  [1,2,3]])
log_sigma = torch.tensor([1,2,2])
print(a[0])
print(a-mu)
a = (a-mu)**2/log_sigma**2
print(a)    
p = a + 2*log_sigma
print(p)
print(p.sum(axis=-1)) 

tensor([1, 2, 3])
tensor([[-3, -3, -3],
        [ 0,  0,  0]])
tensor([[9.0000, 2.2500, 2.2500],
        [0.0000, 0.0000, 0.0000]])
tensor([[11.0000,  6.2500,  6.2500],
        [ 2.0000,  4.0000,  4.0000]])
tensor([23.5000, 10.0000])


In [None]:
import torch
import torch.nn as nn
import numpy as np
from spinup.exercises.pytorch.problem_set_1 import exercise1_1
from spinup.exercises.pytorch.problem_set_1 import exercise1_2_auxiliary

"""

Exercise 1.2: PPO Gaussian Policy

You will implement an MLP diagonal Gaussian policy for PPO by
writing an MLP-builder, and a few other key functions.

Log-likelihoods will be computed using your answer to Exercise 1.1,
so make sure to complete that exercise before beginning this one.

"""

def mlp(sizes, activation, output_activation=nn.Identity):
    """
    Build a multi-layer perceptron in PyTorch.

    Args:
        sizes: Tuple, list, or other iterable giving the number of units
            for each layer of the MLP. 

        activation: Activation function for all layers except last.

        output_activation: Activation function for last layer.

    Returns:
        A PyTorch module that can be called to give the output of the MLP.
        (Use an nn.Sequential module.)

    """
    layers = []
    for j in range(len(sizes)-1):
        act = activation if j < len(sizes) - 2 else output_activation
        layers+=[nn.Linear(sizes[j], sizes[j+1], act())]
    return nn.Sequential(*layers)

class DiagonalGaussianDistribution:

    def __init__(self, mu, log_std):
        self.mu = mu
        self.log_std = log_std

    def sample(self):
        """
        Returns:
            A PyTorch Tensor of samples from the diagonal Gaussian distribution with
            mean and log_std given by self.mu and self.log_std.
        """
        #######################
        #                     #
        #   YOUR CODE HERE    #
        #                     #
        #######################
        return self.mu + torch.exp(self.log_prob)*torch.randn_like(self.mu)

    #================================(Given, ignore)==========================================#
    def log_prob(self, value):
        return exercise1_1.gaussian_likelihood(value, self.mu, self.log_std)

    def entropy(self):
        return 0.5 + 0.5 * np.log(2 * np.pi) + self.log_std.sum(axis=-1)
    #=========================================================================================#


class MLPGaussianActor(nn.Module):

    def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
        super().__init__()
        """
        Initialize an MLP Gaussian Actor by making a PyTorch module for computing the
        mean of the distribution given a batch of observations, and a log_std parameter.

        Make log_std a PyTorch Parameter with the same shape as the action vector, 
        independent of observations, initialized to [-0.5, -0.5, ..., -0.5].
        (Make sure it's trainable!)
        """
        #######################
        #                     #
        #   YOUR CODE HERE    #
        #                     #
        #######################
        log_std = -0.5 * np.ones(act_dim, dtype=np.float32)
        self.log_std = torch.nn.Parameter(torch.as_tensor(log_std))
        self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation)

    #================================(Given, ignore)==========================================#
    def forward(self, obs, act=None):
        mu = self.mu_net(obs)
        pi = DiagonalGaussianDistribution(mu, self.log_std)
        logp_a = None
        if act is not None:
            logp_a = pi.log_prob(act)
        return pi, logp_a
    #=========================================================================================#



if __name__ == '__main__':
    """
    Run this file to verify your solution.
    """
    from spinup import ppo_pytorch as ppo
    from functools import partial
    import os
    import pandas as pd
    import psutil
    import time

    logdir = "/tmp/experiments/%i"%int(time.time())

    ActorCritic = partial(exercise1_2_auxiliary.ExerciseActorCritic, actor=MLPGaussianActor)
    
    ppo(env_fn = lambda : gym.make('InvertedPendulum-v2'),
        actor_critic=ActorCritic,
        ac_kwargs=dict(hidden_sizes=(64,)),
        steps_per_epoch=4000, epochs=20, logger_kwargs=dict(output_dir=logdir))

    # Get scores from last five epochs to evaluate success.
    data = pd.read_table(os.path.join(logdir,'progress.txt'))
    last_scores = data['AverageEpRet'][-5:]

    # Your implementation is probably correct if the agent has a score >500,
    # or if it reaches the top possible score of 1000, in the last five epochs.
    correct = np.mean(last_scores) > 500 or np.max(last_scores)==1e3
    print_result(correct)

In [33]:
obs_dim = 5
hidden_sizes = (2,3)
act_dim = 2
sizes = [obs_dim] + list(hidden_sizes) + [act_dim]
print(sizes)
print(len(sizes))

layers = []
for j in range(len(sizes)-1):
    layers += [j]
print(layers)

[5, 2, 3, 2]
4
[0, 1, 2]
