In [2]:
import tensorflow as tf
import numpy as np

from numpy.random import RandomState

In [3]:
class Actor(tf.keras.Model):
    '''
    Actor network accounts for generatign action space based on
    the state space
    in_features : the size of state representation got from DRRAve
    out_features : the size of action space
    '''
    def __init__(self, in_features=100, out_features=18):
        super(Actor, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        
        self.linear_1 = tf.keras.layers.Dense(units=in_features,
                                              activation='relu',
                                              kernel_initializer=tf.keras.initializers.Orthogonal(seed=42),
                                              bias_initializer='zeros',
                                              kernel_regularizer=tf.keras.regularizers.L2(0.1))
        
        self.linear_2 = tf.keras.layers.Dense(units=in_features,
                                              activation='relu',
                                              kernel_initializer=tf.keras.initializers.Orthogonal(seed=42),
                                              bias_initializer='zeros',
                                              kernel_regularizer=tf.keras.regularizers.L2(0.1))
        
        self.linear_3 = tf.keras.layers.Dense(units=out_features,
                                              activation='tanh',
                                              kernel_initializer=tf.keras.initializers.Orthogonal(seed=42),
                                              bias_initializer='zeros',
                                              kernel_regularizer=tf.keras.regularizers.L2(0.1))
        
    def call(self, state):
        inputs = tf.reshape(state, (1, self.in_features))
        output = self.linear_1(inputs)
        output = self.linear_2(output)
        output = self.linear_3(output)
        return output

In [4]:
class Critic(tf.keras.Model):
    '''
    Critic networks are Deep-Q-Networks
    acton_size : is the size of action space from actor networks
    in_features : is the size of state representation got from DRR-Ave
    out_features : Q-Value
    '''
    def __init__(self, action_size=20, in_features=128, out_features=18):
        super(Critic, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.combo_features = in_features + action_size
        self.action_size = action_size
        ## check shape of the input
        self.linear_1 = tf.keras.layers.Dense(self.in_features, 
                                              activation='relu',
                                              kernel_initializer=tf.keras.initializers.Orthogonal(seed=42),
                                              bias_initializer='zeros',
                                              kernel_regularizer=tf.keras.regularizers.L2(0.1))
        
        self.linear_2 = tf.keras.layers.Dense(self.combo_features, 
                                              activation='relu',
                                              kernel_initializer=tf.keras.initializers.Orthogonal(seed=42),
                                              bias_initializer='zeros',
                                              kernel_regularizer=tf.keras.regularizers.L2(0.1))
        
        self.linear_3 = tf.keras.layers.Dense(self.combo_features, 
                                              activation='relu', 
                                              kernel_initializer=tf.keras.initializers.Orthogonal(seed=42),
                                              bias_initializer='zeros',
                                              kernel_regularizer=tf.keras.regularizers.L2(0.1))
        
        self.linear_4 = tf.keras.layers.Dense(out_features, 
                                              activation=None,
                                              kernel_initializer=tf.keras.initializers.Orthogonal(seed=42),
                                              bias_initializer='zeros',
                                              kernel_regularizer=tf.keras.regularizers.L2(0.1))
        
    def call(self, state, action):
        inputs = tf.reshape(state, (1, self.in_features))
        outputs = self.linear_1(inputs)
        outputs = self.linear_2(tf.concat([action, outputs], 1))
        outputs = self.linear_3(outputs)
        outputs = self.linear_4(outputs)
        return outputs