In [2]:
import os
import argparse
from datetime import datetime
import torch
from utils.SAC import SAC
from utils.env import make_env
from utils.trainer import Trainer

def run(args):
    env = make_env(args.env_id)
    env_test = make_env(args.env_id)

    algo = SAC(
        state_shape=env.observation_space.shape,
        action_shape=env.action_space.shape,
        device=torch.device("cuda" if args.cuda else "cpu"),
        seed=args.seed
    )

    time = datetime.now().strftime("%Y%m%d-%H%M")
    log_dir = os.path.join(
        'logs', args.env_id, 'sac', f'seed{args.seed}-{time}')

    trainer = Trainer(
        env=env,
        env_test=env_test,
        algo=algo,
        log_dir=log_dir,
        num_steps=args.num_steps,
        eval_interval=args.eval_interval,
        seed=args.seed
    )
    trainer.train()

args = argparse.Namespace(
    env_id = 'InvertedPendulum-v4',
    seed = 0,
    num_steps = 100000,
    cuda = True)
run(
    args
)

In [14]:
import gymnasium as gym
env = gym.make('InvertedPendulum-v4')

In [18]:
env.reset()


(array([ 0.00769388,  0.00333762,  0.00876472, -0.0037946 ]), {})

In [22]:
state, _ = env.reset()
print(state)

[-0.00748317  0.00526792 -0.0001853  -0.00792097]


In [8]:
test_env = gym.make('InvertedPendulum-v4')
print(test_env.reset())
test_env.close()

(array([ 0.0068468 ,  0.00772261, -0.00960236,  0.00707424]), {})


In [20]:
env.step(env.action_space.sample())

(array([ 0.01434256, -0.01126657,  0.32283625, -0.71925803]),
 1.0,
 False,
 False,
 {})

In [26]:
env.observation_space, env.action_space

(Box(-inf, inf, (4,), float64), Box(-3.0, 3.0, (1,), float32))

In [13]:
args = argparse.Namespace(
    env_id = 'InvertedPendulum-v4',
    seed = 0,
    num_steps = 100000,
    cuda = True)
run(
    args
)

AttributeError: 'super' object has no attribute '_SAC__init'

In [11]:
# import gymnasium as gym
# env = gym.make("InvertedPendulum-v2", render_mode="human")
# observation, info = env.reset(seed = 42)
# for _ in range(1000):
#     action = env.action_space.sample()
#     observation, reward, done, turnc, info = env.step(action)
#     if done or turnc:
#         observation = env.reset()
# env.close()

In [27]:
import torch


In [29]:
model = torch.load('Final/disc.pth')

In [30]:
model

OrderedDict([('g.0.weight',
              tensor([[ 4.3666e-01,  2.1526e-01,  2.7205e-01, -3.6061e-01],
                      [ 9.5484e-02, -4.7686e-02, -2.3420e-01,  1.0519e-01],
                      [ 1.9766e-01, -3.4511e-01, -6.2845e-02,  1.2974e-01],
                      [ 4.8908e-01, -4.0818e-01,  4.4691e-01, -5.4010e-02],
                      [ 1.1730e-01, -4.2562e-01, -4.3420e-01, -2.5175e-01],
                      [-3.1360e-02,  9.0418e-02,  4.5270e-01, -8.8364e-02],
                      [ 3.5031e-01, -4.2547e-01,  2.8572e-01, -4.3811e-01],
                      [-4.8079e-01,  2.9433e-01, -3.3250e-01, -2.7929e-01],
                      [-3.0064e-01, -1.4040e-01,  2.3896e-01,  2.5422e-01],
                      [-3.7040e-01,  4.4047e-02, -4.0750e-01, -4.2675e-01],
                      [-1.3231e-01, -4.7912e-02,  2.4451e-01,  2.1933e-01],
                      [ 1.1892e-01,  3.7667e-01, -6.2998e-02, -4.1033e-02],
                      [ 4.8950e-01,  3.0957e-02,  3.4657e-01