In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import gymnasium as gym
from reinforcement_learning_course.deep_rl.actor_critic.examples import agents

In [2]:
env = gym.make("LunarLander-v3", continuous=False, gravity=-10.0,enable_wind=False, wind_power=0.0, turbulence_power=0.0)
agent = agents.LunarLanderActorCritic(env)

  from pkg_resources import resource_stream, resource_exists


In [3]:
for param in agent.value_network.parameters():
    print(param)

Parameter containing:
tensor([[-0.0208, -0.0705, -0.3461,  ..., -0.0081, -0.3468, -0.1949],
        [-0.1800,  0.2669,  0.1591,  ..., -0.2763,  0.0929,  0.2203],
        [-0.1739,  0.0287,  0.1122,  ..., -0.1830, -0.3334,  0.3042],
        ...,
        [ 0.1034, -0.3263,  0.0624,  ..., -0.3475, -0.0412,  0.0594],
        [-0.1733, -0.2827, -0.1273,  ...,  0.3119, -0.2241, -0.3158],
        [ 0.1647,  0.0972,  0.2821,  ...,  0.0223,  0.1633, -0.2562]],
       requires_grad=True)
Parameter containing:
tensor([-0.2635, -0.3217,  0.1286,  0.2566, -0.2247,  0.0921,  0.0903,  0.3217,
        -0.0143,  0.1321, -0.2566,  0.1636,  0.0680, -0.2148, -0.2292,  0.2790,
        -0.0509,  0.1536, -0.3076,  0.0992, -0.2599, -0.2468, -0.0326, -0.3386,
        -0.2168, -0.0476, -0.1050, -0.3159, -0.0956, -0.2662,  0.3079, -0.2334,
        -0.3273,  0.1329, -0.3190, -0.0424, -0.2265, -0.3313, -0.0980, -0.3293,
         0.1833,  0.0840,  0.2796, -0.1176,  0.1560, -0.0797,  0.2140, -0.1621,
        -0.0280

In [4]:
l = [param.detach().numpy() for param in agent.value_network.parameters()]

In [5]:
a = next(iter(agent.value_network.parameters()))

In [6]:
a

Parameter containing:
tensor([[-0.0208, -0.0705, -0.3461,  ..., -0.0081, -0.3468, -0.1949],
        [-0.1800,  0.2669,  0.1591,  ..., -0.2763,  0.0929,  0.2203],
        [-0.1739,  0.0287,  0.1122,  ..., -0.1830, -0.3334,  0.3042],
        ...,
        [ 0.1034, -0.3263,  0.0624,  ..., -0.3475, -0.0412,  0.0594],
        [-0.1733, -0.2827, -0.1273,  ...,  0.3119, -0.2241, -0.3158],
        [ 0.1647,  0.0972,  0.2821,  ...,  0.0223,  0.1633, -0.2562]],
       requires_grad=True)

In [7]:
type(a)

torch.nn.parameter.Parameter

In [8]:
agent.value_network.state_dict()

OrderedDict([('mlp.linear1.weight',
              tensor([[-0.0208, -0.0705, -0.3461,  ..., -0.0081, -0.3468, -0.1949],
                      [-0.1800,  0.2669,  0.1591,  ..., -0.2763,  0.0929,  0.2203],
                      [-0.1739,  0.0287,  0.1122,  ..., -0.1830, -0.3334,  0.3042],
                      ...,
                      [ 0.1034, -0.3263,  0.0624,  ..., -0.3475, -0.0412,  0.0594],
                      [-0.1733, -0.2827, -0.1273,  ...,  0.3119, -0.2241, -0.3158],
                      [ 0.1647,  0.0972,  0.2821,  ...,  0.0223,  0.1633, -0.2562]])),
             ('mlp.linear1.bias',
              tensor([-0.2635, -0.3217,  0.1286,  0.2566, -0.2247,  0.0921,  0.0903,  0.3217,
                      -0.0143,  0.1321, -0.2566,  0.1636,  0.0680, -0.2148, -0.2292,  0.2790,
                      -0.0509,  0.1536, -0.3076,  0.0992, -0.2599, -0.2468, -0.0326, -0.3386,
                      -0.2168, -0.0476, -0.1050, -0.3159, -0.0956, -0.2662,  0.3079, -0.2334,
                    

In [9]:
agent.value_network.state_dict()['mlp.linear1.weight']

tensor([[-0.0208, -0.0705, -0.3461,  ..., -0.0081, -0.3468, -0.1949],
        [-0.1800,  0.2669,  0.1591,  ..., -0.2763,  0.0929,  0.2203],
        [-0.1739,  0.0287,  0.1122,  ..., -0.1830, -0.3334,  0.3042],
        ...,
        [ 0.1034, -0.3263,  0.0624,  ..., -0.3475, -0.0412,  0.0594],
        [-0.1733, -0.2827, -0.1273,  ...,  0.3119, -0.2241, -0.3158],
        [ 0.1647,  0.0972,  0.2821,  ...,  0.0223,  0.1633, -0.2562]])

In [10]:
a = np.zeros_like(agent.value_network.state_dict()['mlp.linear1.weight'])
list(agent.value_network.parameters())[0].data = torch.from_numpy(a)

In [11]:
a

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [15]:
list(agent.value_network.parameters())[0].tolist()[0][0] = 1

In [16]:
list(agent.value_network.parameters())[0]

Parameter containing:
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], requires_grad=True)

In [None]:
a

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]], dtype=float32)

In [None]:
list(agent.value_network.parameters())[0]

Parameter containing:
tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]], requires_grad=True)

In [None]:
list(agent.value_network.parameters())[0].data += 1

In [None]:
list(agent.value_network.parameters())[0]

Parameter containing:
tensor([[2., 2., 2.,  ..., 2., 2., 2.],
        [2., 2., 2.,  ..., 2., 2., 2.],
        [2., 2., 2.,  ..., 2., 2., 2.],
        ...,
        [2., 2., 2.,  ..., 2., 2., 2.],
        [2., 2., 2.,  ..., 2., 2., 2.],
        [2., 2., 2.,  ..., 2., 2., 2.]], requires_grad=True)

In [None]:
a

array([[2., 2., 2., ..., 2., 2., 2.],
       [2., 2., 2., ..., 2., 2., 2.],
       [2., 2., 2., ..., 2., 2., 2.],
       ...,
       [2., 2., 2., ..., 2., 2., 2.],
       [2., 2., 2., ..., 2., 2., 2.],
       [2., 2., 2., ..., 2., 2., 2.]], dtype=float32)

In [34]:
with torch.no_grad():
    print([param.numpy() for param in agent.value_network.parameters()])

[array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([-0.2635335 , -0.32171178,  0.12857664,  0.2566239 , -0.22473407,
        0.09210953,  0.09032655,  0.32171813, -0.01425979,  0.13212764,
       -0.2565661 ,  0.16356274,  0.06801376, -0.21481894, -0.22923973,
        0.27904442, -0.05091342,  0.1536    , -0.30758977,  0.09918398,
       -0.2599137 , -0.24677292, -0.03262022, -0.3386065 , -0.21683146,
       -0.04760107, -0.10502948, -0.31594288, -0.09556818, -0.26624507,
        0.30792788, -0.2334484 , -0.327278  ,  0.13286072, -0.3190061 ,
       -0.04244331, -0.22646931, -0.33130208, -0.09797183, -0.32927486,
        0.18334231,  0.08397025,  0.27956048, -0.11763367,  0.15604678,
       -0.07972711,  0.21403262, -0.1621007 , -0.02799082,  0.2863857 ,
       -0.03205752, -0

In [21]:
param = next(iter(agent.value_network.parameters()))
param

Parameter containing:
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], requires_grad=True)

In [22]:
l = param.tolist()

In [23]:
l

[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0,

In [24]:
a = np.array(l)
a

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [25]:
l[0] = 1

In [26]:
a

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])