In [1]:
import datetime
import os
import argparse
import torch

from rlpyt.samplers.collections import TrajInfo
from rlpyt.runners.minibatch_rl import MinibatchRlEval, MinibatchRl
from rlpyt.samplers.serial.sampler import SerialSampler
from rlpyt.utils.logging.context import logger_context

from dreamer_agent import DMCDreamerAgent
from algorithm import Dreamer
from envs.dmc import DeepMindControl
from envs.time_limit import TimeLimit
from envs.action_repeat import ActionRepeat
from envs.normalize_actions import NormalizeActions
from envs.wrapper import make_wapper

2023-12-30 18:34:33.071452: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def build_and_train(
    log_dir,
    game="cartpole_balance",
    run_ID=0,
    cuda_idx=None,
    eval=False,
    save_model="last",
    load_model_path=None,
):
    params = torch.load(load_model_path) if load_model_path else {}
    agent_state_dict = params.get("agent_state_dict")
    optimizer_state_dict = params.get("optimizer_state_dict")
    action_repeat = 2
    factory_method = make_wapper(
        DeepMindControl,
        [ActionRepeat, NormalizeActions, TimeLimit],
        [dict(amount=action_repeat), dict(), dict(duration=1000 / action_repeat)],
    )
    sampler = SerialSampler(
        EnvCls=factory_method,
        TrajInfoCls=TrajInfo,
        env_kwargs=dict(name=game),
        eval_env_kwargs=dict(name=game),
        batch_T=1,
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = Dreamer(initial_optim_state_dict=optimizer_state_dict)  # Run with defaults.
    agent = DMCDreamerAgent(
        train_noise=0.3,
        eval_noise=0,
        expl_type="additive_gaussian",
        expl_min=None,
        expl_decay=None,
        initial_model_state_dict=agent_state_dict,
    )
    runner_cls = MinibatchRlEval if eval else MinibatchRl
    runner = runner_cls(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=5000,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(game=game)
    name = "dreamer_" + game
    with logger_context(
        log_dir,
        run_ID,
        name,
        config,
        snapshot_mode=save_model,
        override_prefix=True,
        use_summary_writer=True,
    ):
        runner.train()


In [3]:
log_dir = os.path.abspath('/home/eddy/Projects/RL_project/logs')

build_and_train(
        log_dir,
        game="humanoid_stand",
        run_ID=0,
        cuda_idx=0,
        eval=False,
        save_model="last",
        load_model_path=False,
    )

2023-12-30 18:34:34.640625  | dreamer_humanoid_stand_0 Runner  master CPU affinity: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].
2023-12-30 18:34:34.641046  | dreamer_humanoid_stand_0 Runner  master Torch threads: 6.


[32musing seed 1564[0m
2023-12-30 18:34:36.069779  | dreamer_humanoid_stand_0 Sampler decorrelating envs, max steps: 0
2023-12-30 18:34:36.070504  | dreamer_humanoid_stand_0 Serial Sampler initialized.
2023-12-30 18:34:36.070996  | dreamer_humanoid_stand_0 Running 50000 iterations of minibatch RL.
2023-12-30 18:34:36.848658  | dreamer_humanoid_stand_0 Initialized agent model on device: cuda:0.
DAI CHE SIAMO VICINIIIIIIII
size passato a SequenceNStepReturnBuffer = 50000
2023-12-30 18:34:36.868767  | dreamer_humanoid_stand_0 Optimizing over 1000 iterations.


  super().__init__(params, defaults)


2023-12-30 18:34:45.506017  | dreamer_humanoid_stand_0 itr #999 saving snapshot...
2023-12-30 18:34:45.553742  | dreamer_humanoid_stand_0 itr #999 saved
2023-12-30 18:34:45.559984  | -----------------------------  ----------
2023-12-30 18:34:45.560600  | Diagnostics/NewCompletedTrajs     2
2023-12-30 18:34:45.561090  | Diagnostics/StepsInTrajWindow  1000
2023-12-30 18:34:45.561706  | Diagnostics/Iteration           999
2023-12-30 18:34:45.562314  | Diagnostics/CumTime (s)           8.68515
2023-12-30 18:34:45.562752  | Diagnostics/CumSteps           1000
2023-12-30 18:34:45.563172  | Diagnostics/CumCompletedTrajs     2
2023-12-30 18:34:45.563788  | Diagnostics/CumUpdates            0
2023-12-30 18:34:45.564300  | Diagnostics/StepsPerSecond      115.139
2023-12-30 18:34:45.564665  | Diagnostics/UpdatesPerSecond      0
2023-12-30 18:34:45.565152  | Diagnostics/ReplayRatio           0
2023-12-30 18:34:45.565600  | Diagnostics/CumReplayRatio        0
2023-12-30 18:34:45.565911  | Length/Av

Imagination:   0%|          | 0/100 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2500x230 and 1024x1024)