In [None]:
import tensorflow as tf
import numpy as np


In [None]:
def normc_initializer(std: float = 1.0):
    def _initializer(shape, dtype=None):
        out = np.random.randn(*shape).astype(
            dtype.name if hasattr(dtype, "name") else dtype or np.float32
        )
        out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
        return tf.constant(out)
    return _initializer

class DuelingModel(tf.keras.Model):
    def __init__(self, input_shape, num_outputs):
        
        super().__init__(self)

#         print(obs_space)
        self.inputs = tf.keras.layers.Input(shape=input_shape,
                                           name="input_layer")
        layer1 = tf.keras.layers.Conv2D(32, 8, strides=4,
                                       name="layer1",
                                       activation=tf.nn.relu,
                                       kernel_initializer=normc_initializer(1.0)
                                       )(self.inputs)
        layer2 = tf.keras.layers.Conv2D(64, 4, strides=2,
                                       name="layer2",
                                       activation=tf.nn.relu,
                                       kernel_initializer=normc_initializer(1.0)
                                       )(layer1)
        layer3 = tf.keras.layers.Conv2D(64, 3, strides=1,
                                       name="layer3",
                                       activation=tf.nn.relu,
                                       kernel_initializer=normc_initializer(1.0)
                                       )(layer2)
        flatten = tf.keras.layers.Flatten()(layer3)
        #split stream
        value_stream = dense = tf.keras.layers.Dense(512,
                                        activation=tf.nn.relu,
                                        name="value_stream",
                                        kernel_initializer=normc_initializer(1.0)
                                        )(flatten)
        
        advantage_stream = tf.keras.layers.Dense(512,
                                        activation=tf.nn.relu,
                                        name="advantage_stream",
                                        kernel_initializer=normc_initializer(1.0)
                                        )(flatten)
        value_out = dense = tf.keras.layers.Dense(1,
                                        activation=None,
                                        name="value_out",
                                        kernel_initializer=normc_initializer(1.0)
                                        )(flatten)
        advantage_out = dense = tf.keras.layers.Dense(num_outputs,
                                        activation=None,
                                        name="advantage_out",
                                        kernel_initializer=normc_initializer(1.0)
                                        )(flatten)
        #Version with advantage mean, to solve unidentifiabilty
        self.q_out = (value_out + (advantage_out - tf.math.reduce_mean(advantage_out, axis=1, keepdims=True)))

        #doesnt output the actual q-values. this is handled in the q-head
        self.base_model = tf.keras.Model(inputs=self.inputs, outputs=self.q_out)
        
        def call(self, inputs):
            return self.base_model(inputs)
        