In [2]:
import argparse
from datetime import datetime   
import json
import os
from pprint import pprint
import numpy as np
import torch
from torch.distributions import kl_divergence
from torch.nn.functional import mse_loss
from torch.nn.utils import clip_grad_norm_
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
from dm_control import suite
from dm_control.suite.wrappers import pixels

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
def main():
    parser = argparse.ArgumentParser(description='pytorch implementation of PlaNet by LinboWang')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--log-dir', type=str, default='log')
    parser.add_argument('--test-interval', type=int, default=10)
    parser.add_argument('--domain-name', type=str, default='cheetah')
    parser.add_argument('--task-name', type=str, default='run')
    parser.add_argument('--action-repeat', type=int, default=4) # cheetah 4; cartpole 8; reacher 4; finger 2; cup 4; walker 2
    parser.add_argument('--state-dim', type=int, default=30) # Distributions in latent space are 30-dimensional diagonal Gaussians with predicted mean and standard deviation.
    parser.add_argument('--rnn-hidden-dim', type=int, default=200) # a GRU (Cho et al., 2014) with 200 units as deterministic path in the dynamics model
    parser.add_argument('--buffer-capacity', type=int, default=1000000)
    parser.add_argument('--all-episodes', type=int, default=200)
    parser.add_argument('--seed-episodes', type=int, default=5) # We start from S = 5 seed episodes with random actions
    parser.add_argument('--update-steps', type=int, default=100) # collect another episode every C = 100 update steps
    parser.add_argument('--batch-size', type=int, default=50) # batches of B = 50
    parser.add_argument('sequence-chunk-length', type=int, default=50) # sequence chunks of length L = 50
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--epsilon', type=float, default=1e-4) # The model is trained using the Adam optimizer (Kingma & Ba, 2014) with a learning rate of 1e-3, epsilon = 1e−4
    parser.add_argument('--clip-grad-norm', type=int, default=1000) # gradient clipping norm of 1000
    parser.add_argument('--free-nats', type=int, default=3) # grant the model 3 free nats by clipping the divergence loss below this value
    """
    For planning, we use:
    CEM with horizon length H = 12, 
    optimization iterations I = 10, 
    candidate samples J = 1000, 
    and refitting to the best K = 100
    """
    parser.add_argument('--horizon', type=int, default=12)
    parser.add_argument('--N-iterations', type=int, default=10)
    parser.add_argument('--N-candidates', type=int, default=1000)
    parser.add_argument('--N-top-candidates', type=int, default=100)
    parser.add_argument('--action-noise-var', type=float, default=0.3) # under epsilon ∼ Normal(0, 0.3) action noise
    args = parser.parse_args()
    
    # 准备训练日志的输出路径
    log_dir = os.path.join(args.log_dir, args.domain_name + '_' + args.task_name)
    log_dir = os.path.join(log_dir, datetime.now().strftime('%Y%m%d_%H%M'))
    os.makedirs(log_dir)
    with open(os.path.join(log_dir, 'args.json'), 'w') as f:
        json.dump(vars(args), f) # 写入超参数到 json 文件中
    pprint(vars(args))
    writer = SummaryWriter(log_dir=log_dir) # 初始化 TensorBoard 日志
    
    # 设定随机数种子
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
    
    # 创建 dm_control 环境并用 GymWrapper 修饰，其中包含了 RepeatStep
    env = suite.load(args.domain_name, args.task_name, task_kwargs={'random': args.seed})
    env = pixels.Wrapper(env, render_kwargs={'height': 64, 'width': 64, 'camera_id': 0})
    
    
    # seed episodes，随机策略收集经验
    
    # 剩余的 episodes，固定步长更新模型参数并根据 CEM 算法进行动作规划
    
        # 每 10 个更新步长测试一次
    
    # 保存模型参数，供测试和重建观测视频
    
matrix = np.array([[[0, 0, 0]],
                  [[1, 1, 1]],
                  [[2, 2, 2]]])
print(matrix, matrix.shape)
print(matrix[:])

[[[0 0 0]]

 [[1 1 1]]

 [[2 2 2]]] (3, 1, 3)
[[[0 0 0]]

 [[1 1 1]]

 [[2 2 2]]]


In [1]:
import torch
import torch.nn as nn

# 参数设置
input_size = 10
hidden_size = 20
batch_size = 5
sequence_length = 7

# 创建 GRUCell 实例
gru_cell = nn.GRUCell(input_size, hidden_size)

# 初始化隐藏状态
hx = torch.zeros(batch_size, hidden_size)

# 生成随机输入数据
input_data = torch.randn(sequence_length, batch_size, input_size)

# 处理时间步数据
outputs = []
for i in range(sequence_length):
    hx = gru_cell(input_data[i], hx)
    outputs.append(hx)

# 将输出转换为张量
outputs = torch.stack(outputs)

print(outputs.shape)  # 输出形状: (sequence_length, batch_size, hidden_size)


torch.Size([7, 5, 20])


  from .autonotebook import tqdm as notebook_tqdm
