In [1]:
import datetime
import os
import argparse
import torch

from rlpyt.samplers.collections import TrajInfo
from rlpyt.runners.minibatch_rl import MinibatchRlEval, MinibatchRl
from rlpyt.samplers.serial.sampler import SerialSampler
from rlpyt.utils.logging.context import logger_context

from dreamer_agent import DMCDreamerAgent
from algorithm import Dreamer
from envs.dmc import DeepMindControl
from envs.time_limit import TimeLimit
from envs.action_repeat import ActionRepeat
from envs.normalize_actions import NormalizeActions
from envs.wrapper import make_wapper

2023-12-29 11:28:12.762227: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def build_and_train(
    log_dir,
    game="cartpole_balance",
    run_ID=0,
    cuda_idx=None,
    eval=False,
    save_model="last",
    load_model_path=None,
):
    params = torch.load(load_model_path) if load_model_path else {}
    agent_state_dict = params.get("agent_state_dict")
    optimizer_state_dict = params.get("optimizer_state_dict")

    action_repeat = 2
    factory_method = make_wapper(
        DeepMindControl,
        [ActionRepeat, NormalizeActions, TimeLimit],
        [dict(amount=action_repeat), dict(), dict(duration=1000 / action_repeat)],
    )
    sampler = SerialSampler(
        EnvCls=factory_method,
        TrajInfoCls=TrajInfo,
        env_kwargs=dict(name=game),
        eval_env_kwargs=dict(name=game),
        batch_T=1,
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = Dreamer(initial_optim_state_dict=optimizer_state_dict)  # Run with defaults.
    agent = DMCDreamerAgent(
        train_noise=0.3,
        eval_noise=0,
        expl_type="additive_gaussian",
        expl_min=None,
        expl_decay=None,
        initial_model_state_dict=agent_state_dict,
    )
    runner_cls = MinibatchRlEval if eval else MinibatchRl
    runner = runner_cls(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=5e6,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(game=game)
    name = "dreamer_" + game
    with logger_context(
        log_dir,
        run_ID,
        name,
        config,
        snapshot_mode=save_model,
        override_prefix=True,
        use_summary_writer=True,
    ):
        runner.train()


In [3]:
log_dir = os.path.abspath('/home/eddy/Projects/RL_project/logs')

build_and_train(
        log_dir,
        game="humanoid_stand",
        run_ID=0,
        cuda_idx=0,
        eval=False,
        save_model="last",
        load_model_path=False,
    )

2023-12-29 11:28:16.365711  | dreamer_humanoid_stand_0 Runner  master CPU affinity: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].
2023-12-29 11:28:16.366610  | dreamer_humanoid_stand_0 Runner  master Torch threads: 6.


[32musing seed 3988[0m
 SO LILLOOOOOOOOOOOOOOOOOOOOOO
tensor([[[ 29,  29,  29,  ...,  29,  29,  29],
         [ 29,  29,  29,  ...,  29,  29,  29],
         [ 29,  29,  29,  ...,  29,  29,  29],
         ...,
         [ 54,  55,  55,  ...,  27,  27,  27],
         [ 54,  55,  55,  ...,  27,  27,  27],
         [ 54,  55,  55,  ...,  27,  27,  27]],

        [[ 48,  48,  48,  ...,  48,  48,  48],
         [ 48,  48,  48,  ...,  48,  48,  48],
         [ 48,  48,  48,  ...,  48,  48,  48],
         ...,
         [ 80,  81,  82,  ...,  56,  56,  55],
         [ 80,  81,  82,  ...,  56,  56,  55],
         [ 81,  82,  82,  ...,  56,  56,  55]],

        [[ 67,  67,  67,  ...,  67,  67,  67],
         [ 67,  67,  67,  ...,  67,  67,  67],
         [ 67,  67,  67,  ...,  67,  67,  67],
         ...,
         [107, 108, 109,  ...,  83,  83,  82],
         [107, 108, 110,  ...,  83,  83,  82],
         [107, 109, 110,  ...,  83,  83,  82]]], dtype=torch.uint8) tensor([ 0.7466, -0.1284,  0.18

RuntimeError: hidden0 has inconsistent hidden_size: got 30, expected 200