# 自作のActor-Criticノートブック

In [None]:
import numpy as np
import copy
from dataclasses import dataclass, asdict, is_dataclass

import sys
import logging

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Normal

import gymnasium as gym

from myActivator import tanhAndScale
from myFunction import make_squashed_gaussian

In [2]:
logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s [%(levelname)s] %(message)s",
                    stream=sys.stdout, datefmt="%H:%M:%S")

In [3]:
env = gym.make("Pendulum-v1",render_mode="human")
for key in vars(env.spec):
    logging.info('%s: %s', key, vars(env.spec)[key])
for key in vars(env.unwrapped):
    logging.info('%s: %s', key, vars(env.unwrapped)[key])

00:31:04 [INFO] id: Pendulum-v1
00:31:04 [INFO] entry_point: gymnasium.envs.classic_control.pendulum:PendulumEnv
00:31:04 [INFO] reward_threshold: None
00:31:04 [INFO] nondeterministic: False
00:31:04 [INFO] max_episode_steps: 200
00:31:04 [INFO] order_enforce: True
00:31:04 [INFO] disable_env_checker: False
00:31:04 [INFO] kwargs: {'render_mode': 'human'}
00:31:04 [INFO] additional_wrappers: ()
00:31:04 [INFO] vector_entry_point: None
00:31:04 [INFO] namespace: None
00:31:04 [INFO] name: Pendulum
00:31:04 [INFO] version: 1
00:31:04 [INFO] max_speed: 8
00:31:04 [INFO] max_torque: 2.0
00:31:04 [INFO] dt: 0.05
00:31:04 [INFO] g: 10.0
00:31:04 [INFO] m: 1.0
00:31:04 [INFO] l: 1.0
00:31:04 [INFO] render_mode: human
00:31:04 [INFO] screen_dim: 500
00:31:04 [INFO] screen: None
00:31:04 [INFO] clock: None
00:31:04 [INFO] isopen: True
00:31:04 [INFO] action_space: Box(-2.0, 2.0, (1,), float32)
00:31:04 [INFO] observation_space: Box([-1. -1. -8.], [1. 1. 8.], (3,), float32)
00:31:04 [INFO] spec

In [4]:
@dataclass
class Config:
    V_net_sizes = [6,12,12,6]
    P_net_sizes = [6,12,12,6]
    V_net_in = 3
    P_net_in = 3
    V_net_out = 1
    P_net_out = 1

    V_lr = 1e-3
    P_lr = 1e-3

    u_high = 2.0
    u_low = -2.0

In [None]:
class ActorCriticAgent:
    def __init__(self,Config,device=None):
        if Config:
            self.Config = Config
        else:
            raise Exception("No Config!!")
        
        if device is None:
            device = torch.device("cuda" if torch.cuda.is_available else "cpu")
        else:
            self.device = torch.device(device)

        self.u_high = torch.as_tensor(Config.u_high, dtype=torch.float32, device=self.device)
        self.u_low = torch.as_tensor(Config.u_low, dtype=torch.float32, device=self.device)
        
        self.V_net = self.build_net(
            Config.V_net_in,
            Config.V_net_sizes,
            Config.V_net_out
        ).to(self.device)
        self.V_net.train()

        self.P_net = self.build_net(
            Config.P_net_in,
            Config.P_net_sizes,
            Config.P_net_out
        ).to(self.device)
        self.P_net.train()

        self.V_optim = optim.Adam(self.V_net.parameters(),Config.V_lr)
        self.P_optim = optim.Adam(self.P_net.parameters(),Config.P_lr)

    
    def to(self,device):
        self.device = torch.device(device)
        self.V_net.to(self.device)
        self.P_net.to(self.device)
        return self


    def build_net(self,input_size,hidden_sizes,output_size=1,output_activator=None):
        layers = []
        for input_size, output_size in zip([input_size]+hidden_sizes, hidden_sizes+[output_size]):
            layers.append(nn.Linear(input_size,output_size))
            layers.append(nn.ReLU())
        layers = layers[:-1]
        if output_activator:
            layers.append(output_activator)
        net = nn.Sequential(*layers)
        return net