In [610]:
import numpy as np

class OUNoise(object):
    def __init__(self, action_space, mu=0.0, theta=0.15, max_sigma=0.7, min_sigma=0.4, decay_period=600_000):
        self.mu = mu
        self.theta = theta
        self.sigma = max_sigma
        self.max_sigma = max_sigma
        self.min_sigma = min_sigma
        self.decay_period = decay_period
        self.action_dim = action_space
        self.reset()

    def reset(self):
        self.state = np.ones(self.action_dim) * self.mu

    def evolve_state(self):
        x = self.state
        dx = self.theta * (self.mu - x) + self.sigma * np.random.randn(self.action_dim)
        self.state = x + dx
        return self.state

    def get_noise(self, t=0):
        ou_state = self.evolve_state()
        decaying = float(float(t) / self.decay_period)
        self.sigma = max(self.sigma - (self.max_sigma - self.min_sigma) * min(1.0, decaying), self.min_sigma)
        print('sigma:', self.sigma, 'state:', ou_state)
        return ou_state

In [611]:
noise = OUNoise(action_space=2, max_sigma=0.9, min_sigma=0.1, decay_period=500_000)

In [612]:
noise.get_noise(t=60_00)

sigma: 0.8904 state: [0.23640066 1.72709601]


array([0.23640066, 1.72709601])

In [613]:
from env_utils import GoalManager

In [614]:
GM = GoalManager()

[92mObstacle name: wall_outler, base pose: (0.0, 0.0, 0.0)[0m
Coordinates: [[[11.5, -8.425], [11.5, -11.575], [-11.5, -11.575], [-11.5, -8.425]], [[21.5, 1.575], [21.5, -1.575], [-1.5, -1.575], [-1.5, 1.575]], [[11.5, 11.575], [11.5, 8.425], [-11.5, 8.425], [-11.5, 11.575]], [[1.5, 1.575], [1.5, -1.575], [-21.5, -1.575], [-21.5, 1.575]]]



In [615]:
import torch

v = torch.rand(5) * 1000
v_1 = v.clone()
v.requires_grad_(True)
v_1.requires_grad_(True)

loss = 1/2 * torch.sum(v_1 * v_1 + v * v)
# Here grads of loss w.r.t v and v_1 should be v and v_1 respectively
loss.backward()

# Clip grads of v_1
torch.nn.utils.clip_grad_norm_(v_1, max_norm=1.0, norm_type=2)

print(v.grad)
print(v_1.grad)
print(v.grad / torch.norm(v.grad, p=2))

tensor([574.9388, 754.5155, 960.6138, 782.7532, 402.9149])
tensor([0.3567, 0.4681, 0.5960, 0.4857, 0.2500])
tensor([0.3567, 0.4681, 0.5960, 0.4857, 0.2500])


In [616]:
v_2 = v_1.clone()

v_3 = torch.tanh(v_2)

v_2.requires_grad_(True)
v_3.requires_grad_(True)

loss2 = 1/2 * torch.sum(v_2 * v_2 + v_3 * v_3)

# Retain grad for v_2 and v_3
v_2.retain_grad()
v_3.retain_grad()

loss2.backward()

print(v_2.grad)
print(v_3.grad)

tensor([574.9388, 754.5155, 960.6138, 782.7532, 402.9149])
tensor([1., 1., 1., 1., 1.])


In [617]:
import torch.nn as nn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Actor(torch.nn.Module):
    def __init__(self,
    name,           # Name of the network
    state_size,     # Number of input neurons
    action_size,    # Number of output neurons
    hidden_size,     # Number of neurons in hidden layers
    ):
        super(Actor, self).__init__()
        self.name = name
        self.iteration = 0

        # Layer Definition
        self.fa1 = nn.Linear(state_size, hidden_size)
        self.fa2 = nn.Linear(hidden_size, hidden_size)
        self.fa3 = nn.Linear(hidden_size, hidden_size)
        self.fa4 = nn.Linear(hidden_size, action_size)

        for name, param in self.named_parameters():
            if param.is_leaf:
                print(f"Parameter {name} is a leaf tensor.")

        # Initialize weights
        # Using Kaiming initialization
        self.apply(self.init_weights)

    def init_weights(self, m: torch.nn.Module):
        # Initialize the weights of the network
        if isinstance(m, torch.nn.Linear):
            # Kaiming He initialization
            torch.nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
            m.bias.data.fill_(0.0)

    def forward(self, states, visualize=False):
        # Forward pass
        x1 = torch.relu(self.fa1(states))
        x2 = torch.relu(self.fa2(x1))
        x3 = torch.relu(self.fa3(x2))
        action = torch.tanh(self.fa4(x3))

        return action
    
class Critic(torch.nn.Module):
    def __init__(self,
    name,           # Name of the network 
    state_size,     # Number of input neurons
    action_size,    # Number of output neurons
    hidden_size,     # Number of neurons in hidden layers
    ):
        super(Critic, self).__init__()
        self.name = name
        self.iteration = 0

        # Q1 Architecture
        self.l01 = nn.Linear(state_size + action_size, hidden_size)
        self.l02 = nn.Linear(hidden_size, hidden_size)
        self.l03 = nn.Linear(hidden_size, hidden_size)
        self.l04 = nn.Linear(hidden_size, 1)

        # Q2 Architecture
        self.l11 = nn.Linear(state_size + action_size, hidden_size)
        self.l12 = nn.Linear(hidden_size, hidden_size)
        self.l13 = nn.Linear(hidden_size, hidden_size)
        self.l14 = nn.Linear(hidden_size, 1)

        for name, param in self.named_parameters():
            if param.is_leaf:
                print(f"Parameter {name} is a leaf tensor.")

        # Initialize weights
        # Using Kaiming initialization
        self.apply(self.init_weights)

    def init_weights(self, m: torch.nn.Module):
        # Initialize the weights of the network
        if isinstance(m, torch.nn.Linear):
            # Kaiming He initialization
            torch.nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
            m.bias.data.fill_(0.0)

    def forward(self, states, actions, visualize=False) -> torch.Tensor:
        
        # Concatenate the states and actions
        sa = torch.cat((states, actions), dim=1)

        # Q1 forward pass
        x01 = torch.relu(self.l01(sa))
        x02 = torch.relu(self.l02(x01))
        x03 = torch.relu(self.l03(x02))
        q1 = self.l04(x03)

        # Q2 forward pass
        x11 = torch.relu(self.l11(sa))
        x12 = torch.relu(self.l12(x11))
        x13 = torch.relu(self.l13(x12))
        q2 = self.l14(x13)

        return q1, q2


    def Q1_forward(self, states, actions) -> torch.Tensor:
        
        # Concatenate the states and actions
        sa = torch.cat((states, actions), dim=1)

        # Q1 forward pass
        x01 = torch.relu(self.l01(sa))
        x02 = torch.relu(self.l02(x01))
        x03 = torch.relu(self.l03(x02))
        q1 = self.l04(x03)

        return q1

In [618]:
state_size = 10
action_size = 2

actor = Actor('actor', state_size, action_size, 256).to(device)
critic = Critic('critic', state_size, action_size, 256).to(device)

actor_optimizer = torch.optim.AdamW(actor.parameters(), lr=1e-4)

Parameter l01.weight is a leaf tensor.
Parameter l01.bias is a leaf tensor.
Parameter l02.weight is a leaf tensor.
Parameter l02.bias is a leaf tensor.
Parameter l03.weight is a leaf tensor.
Parameter l03.bias is a leaf tensor.
Parameter l04.weight is a leaf tensor.
Parameter l04.bias is a leaf tensor.
Parameter l11.weight is a leaf tensor.
Parameter l11.bias is a leaf tensor.
Parameter l12.weight is a leaf tensor.
Parameter l12.bias is a leaf tensor.
Parameter l13.weight is a leaf tensor.
Parameter l13.bias is a leaf tensor.
Parameter l14.weight is a leaf tensor.
Parameter l14.bias is a leaf tensor.


In [619]:
random_state = torch.rand(1, state_size).to(device)

action = actor(random_state)

loss_actor = -critic.Q1_forward(random_state, action).mean()

print(f'loss_actor: {loss_actor}')

actor_optimizer.zero_grad()
loss_actor.backward()
for p in actor.parameters():
    print(p.grad)

print(f'Grad norm of actor: {torch.norm(torch.cat([p.grad.flatten() for p in actor.parameters()]))}')

actor_optimizer.step()


loss_actor: 0.09932661056518555
tensor([[0.0049, 0.0037, 0.0051,  ..., 0.0029, 0.0026, 0.0048],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0106, 0.0081, 0.0111,  ..., 0.0063, 0.0056, 0.0105],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],
       device='cuda:0')
tensor([ 0.0056,  0.0000,  0.0123,  0.0072,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0004,  0.0000,  0.0000, -0.0074,  0.0000,  0.0039,  0.0000,
        -0.0092, -0.0121,  0.0000,  0.0000,  0.0000,  0.0000, -0.0025,  0.0000,
        -0.0119,  0.0054,  0.0045, -0.0068,  0.0000,  0.0000,  0.0000,  0.0000,
        -0.0133,  0.0000,  0.0050,  0.0000, -0.0076,  0.0000,  0.0040,  0.0000,
         0.0000,  0.0072,  0.0000,  0.0000,  0.0031,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.0069,  0.0