In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.optimizers import Adam
import numpy as np

In [None]:
#Dueling Deep Q Networks do not require val computation of all state-action pairs
#Seperate val state, val action to cover state-space generally 
#Improves computation in scenario where ideal action is nothing, saves time over computing ALL state-action pairs
class DuelingDeepQNetwork(keras.Model):
    def __init__(self, fc1_dims, fc2_dims, n_actions):
        super(DuelingDeepQNetwork, self).__init__()
        self.dense1 = keras.layers.Dense(fc1_dims, activation='relu')
        self.dense2 = keras.layers.Dense(fc2_dims, activation='relu')
        self.Value = keras.layers.Dense(1, activation=None) #output raw val state
        self.Advantage = keras.layers.Dense(n_actions, activation=None) #importance of taking action
        
    #advantage & value streams are combined to form Q(s, a) estimate
    #cannot simply sum both, require addl op to compute & derive advantage & val from Q(s, a)
    #Q(s, a) still computed, ideally can apply normal/prioritized exp replay, other Q-learning algos
    def call(self, state):
        #feed-fwd through dense layers, then val, adv streams
        x = self.dense1(state)
        x = self.dense2(x)
        Value = self.Value(x)
        Advantage = self.Advantage(x)
        
        Q = (Value + (Advantage - tf.math.reduce_mean(Advantage, axis=1, keepdims=True)))
        return Q
    
    def advntage(self, state):
        x = self.dense1(state)
        x = self.dense2(x)
        Advantage = self.Advantage(x)
        
        return Advantage