In [1]:
# Refer from
#  https://pasus.tistory.com/138
#  https://horomary.hatenablog.com/entry/2020/06/26/003806
#  https://keras.io/examples/rl/ddpg_pendulum/
#
import gym
import sys
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, concatenate, Lambda
from collections import deque
import matplotlib.pyplot as plt

In [3]:
class Actor(tf.keras.Model):
    def __init__(self, state_size, action_size, action_min, action_max):
        super(Actor, self).__init__()
        self.action_min = action_min
        self.action_max = action_max

        self.fc1 = Dense(64, activation='relu')
        self.fc2 = Dense(64, activation='relu')
        # self.fc3 = Dense(16, activation='relu')
        self.out= Dense(action_size, activation='tanh',kernel_initializer = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)) # -1 ~ +1

    def call(self, x):
        x       = self.fc1(x)
        x       = self.fc2(x)
        # x       = self.fc3(x)
        action  = self.out(x)
        # return self.projected_to_action_space(action)
        a = Lambda(lambda x: x*self.action_max)(action)
        return a

class Critic(tf.keras.Model):
    def __init__(self, state_size, action_size):
        super(Critic, self).__init__()
        self.s1 = Dense(16, activation='relu')
        self.s2 = Dense(32, activation='relu')
        self.a1 = Dense(32, activation='relu')
        self.a2 = Dense(32, activation='relu')
        self.fc1= Dense(64, activation='relu')
        self.fc2= Dense(64, activation='relu')
        self.out= Dense(1,  activation='linear')

    def call(self,state_action):
        state  = state_action[0]
        action = state_action[1]
        s = self.s1(state)
        s = self.s2(s)
        a = self.a1(action)
        a = self.a2(a)
        c = concatenate([s,a],axis=-1)
        x = self.fc1(c)
        x = self.fc2(x)
        q = self.out(x)
        return q

In [4]:
class DDPGAgent(tf.keras.Model):
    def __init__(self, state_size, action_size, action_min, action_max):
        super(DDPGAgent, self).__init__()
        self.state_size = state_size
        self.action_size= action_size
        self.action_max = action_max
        self.action_min = action_min

        # Hyper params for learning
        self.discount_factor = 0.99
        self.learning_rate_actor = 0.002
        self.learning_rate_critic = 0.001
        self.tau = 0.005
        
        # Neural Network Architecture
        self.actor        = Actor(self.state_size, self.action_size, self.action_min, self.action_max)
        self.actor.build(input_shape=(None, self.state_size))
        self.actor.summary()
        
    def get_action(self,state):
        # Exploration and Exploitation
        state = tf.convert_to_tensor([state], dtype=tf.float32)
        action = self.actor(state)
        out = np.clip(action.numpy()[0], -self.action_max, self.action_max)
        return out

    def load_model(self):
        self.actor.save_weights("./save_model/pendulum_ddpg_TF_actor", save_format="tf")
        return

In [5]:
# %matplotlib tk

ENV_NAME = 'Pendulum-v0'
EPISODES = 200
END_SCORE = -200

if __name__ == "__main__":
    env = gym.make(ENV_NAME)
    state_size  = env.observation_space.shape[0]
    action_size = env.action_space.shape[0]
    action_min  = env.action_space.low[0]
    action_max  = env.action_space.high[0]

    agent = DDPGAgent(state_size, action_size, action_min, action_max)
    print('Env Name : ',ENV_NAME)
    print('States {0}, Actions {1}'.format(state_size, action_size))
    print('Action space {0:.2f} ~ {1:.2f}'.format(action_min, action_max))

    end = False
    
    fig = plt.figure(1)
    fig.clf()
    
    for e in range(EPISODES):
        done = False
        score = 0
        state = env.reset()
        while not done:
            env.render()

            # Interact with env.
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            state = next_state

            # 
            score += reward
            if done:
                print("episode: {0:3d} | score : {1:3.2f} | mem size {2} |"
                    .format(e, score, len(agent.memory)))

Model: "actor"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  256       
_________________________________________________________________
dense_1 (Dense)              multiple                  4160      
_________________________________________________________________
dense_2 (Dense)              multiple                  65        
Total params: 4,481
Trainable params: 4,481
Non-trainable params: 0
_________________________________________________________________
Model: "critic"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              multiple                  64        
_________________________________________________________________
dense_7 (Dense)              multiple                  544       
___________________________________________

TypeError: object of type 'NoneType' has no len()

In [None]:
env.close()