In [1]:
import tensorflow as tf
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from numpy.random import RandomState

In [39]:
class Actor(tf.keras.Model):
    '''
    Actor network accounts for generatign action space based on
    the state space
    in_features : the size of state representation got from DRRAve
    out_features : the size of action space
    '''
    def __init__(self, in_features=100, out_features=18):
        super(Actor, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        
        self.linear_1 = tf.keras.layers.Dense(in_features, activation='relu')
        self.linear_2 = tf.keras.layers.Dense(in_features, activation='relu')
        self.linear_3 = tf.keras.layers.Dense(out_features, activation='tanh')
        
    def call(self, state):
        inputs = tf.reshape(state, (1, self.in_features))
        output = self.linear_1(inputs)
        output = self.linear_2(output)
        output = self.linear_3(output)
        return output

In [46]:
class Critic(tf.keras.Model):
    '''
    Critic networks are Deep-Q-Networks
    acton_size : is the size of action space from actor networks
    in_features : is the size of state representation got from DRR-Ave
    out_features : Q-Value
    '''
    def __init__(self, action_size=20, in_features=128, out_features=18):
        super(Critic, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.combo_features = in_features + action_size
        self.action_size = action_size
        ## check shape of the input
        self.linear_1 = tf.keras.layers.Dense(self.in_features, 
                                              activation='relu')
        
        self.linear_2 = tf.keras.layers.Dense(self.combo_features, 
                                              activation='relu')
        
        self.linear_3 = tf.keras.layers.Dense(self.combo_features, 
                                              activation='relu')
        
        self.linear_4 = tf.keras.layers.Dense(out_features, 
                                              activation=None)
        
    def call(self, state, action):
        inputs = tf.reshape(state, (1, self.in_features))
        outputs = self.linear_1(inputs)
        outputs = self.linear_2(tf.concat([action, outputs], 1))
        outputs = self.linear_3(outputs)
        outputs = self.linear_4(outputs)
        return outputs

In [40]:
## instansiasi model
actorNets = Actor(300, 18)

In [41]:
random_state = RandomState(1)

state = np.random.rand(300,)
state

array([7.71147275e-01, 7.05817944e-01, 7.35377270e-01, 9.40196060e-02,
       9.83983967e-01, 1.92378648e-01, 6.08148135e-01, 7.03158469e-01,
       4.20905221e-01, 7.33699317e-01, 4.71980048e-01, 3.35782159e-01,
       2.95776495e-01, 3.66253373e-02, 1.07180819e-01, 2.33618516e-01,
       3.54188984e-01, 9.68162850e-01, 7.46281890e-01, 3.89474696e-01,
       8.28250311e-01, 5.04679368e-01, 5.00776279e-01, 2.62748739e-01,
       3.17628430e-01, 8.36763079e-01, 5.76347713e-01, 4.01251262e-01,
       5.82598562e-01, 1.57070226e-01, 1.15862180e-02, 1.52103656e-01,
       2.37376750e-01, 9.77834857e-01, 7.51766556e-01, 8.66458685e-01,
       2.71509105e-01, 3.70663927e-01, 2.19800236e-01, 5.39546075e-01,
       2.13273917e-01, 6.88766027e-01, 3.03473675e-01, 5.99317357e-01,
       9.43061828e-01, 1.92008197e-01, 4.62551742e-01, 4.72669405e-01,
       7.32023201e-01, 8.43485181e-02, 8.15969285e-01, 1.79204213e-01,
       3.37174983e-01, 2.71392218e-01, 1.70032544e-01, 2.55261274e-01,
      

In [42]:
## output has two dimension so need to be flattened
action = actorNets(state)

In [43]:
action

<tf.Tensor: shape=(1, 18), dtype=float32, numpy=
array([[-0.444847  ,  0.29251578,  0.13992591,  0.14760038, -0.1906978 ,
         0.00556268, -0.0185544 , -0.50543755, -0.750366  , -0.340534  ,
        -0.09151803,  0.5788195 ,  0.5964281 ,  0.16290238, -0.81896913,
        -0.05147763, -0.2742237 , -0.06414839]], dtype=float32)>

In [137]:
actorNets.summary()

Model: "actor_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_48 (Dense)            multiple                  10100     
                                                                 
 dense_49 (Dense)            multiple                  10100     
                                                                 
 dense_50 (Dense)            multiple                  1818      
                                                                 
Total params: 22,018
Trainable params: 22,018
Non-trainable params: 0
_________________________________________________________________


In [47]:
criticNets = Critic(18, 300, 18)

In [48]:
criticNets(state, action)

<tf.Tensor: shape=(1, 18), dtype=float32, numpy=
array([[-0.3592132 ,  0.11059743,  0.18644165, -0.11787526, -0.65208435,
        -0.0583063 , -0.76077574, -0.17730346,  0.12711957, -0.1088125 ,
         0.11020342, -0.29217735,  0.6736198 , -0.04229749, -0.12525314,
        -0.07434888,  0.4813763 , -0.4004351 ]], dtype=float32)>