In [None]:
import numpy as np
import tensorflow as tf

In [None]:
#data
size = 100
bound = 10
x_train = np.random.uniform(0,bound,size).reshape(size,1)
y_train = np.floor(x_train).astype('int').reshape(size)

In [25]:
class ProgNet(tf.keras.layers.Layer):
    def __init__(self,layers,U_activation = tf.keras.activations.relu, prev_net=None,name=None):
        super(ProgNet,self).__init__(name = name)
        
        assert prev_net is None or isinstance(prev_net,ProgNet)
        assert isinstance(layers,list)
        assert all(isinstance(layer, tf.keras.layers.Dense) for layer in layers)
        
        self.columns = []
        self.U = []
        self.U_activation = U_activation
        self.width = 0
        self.depth = len(layers)
        
        if prev_net is None:
            pass
        else: 
            assert len(layers) == prev_net.depth
            #TO DO: copy weights from prev net
            net_copy = prev_net
            
            #for layer in net_copy.layers: 
            #    layer.trainable = False
                
            self.U = net_copy.U
            self.columns = net_copy.columns
            self.width = net_copy.width 
            
        
        self.columns.append(layers)  
        self.width += 1 
        #self.U.append([tf.keras.layers.Dense(layer.units,use_bias = False,name='U_' + str(self.width) + ,trainable = True) for layer in self.columns[-1]]
        self.U.append([[tf.keras.layers.Dense(layer.units,use_bias = False,name='U_'+str(i)+'_' + str(j),trainable = True) for i,layer in enumerate(layers)] for j in range(self.width-1)])
        

    def call_h(self,x,i,k):
        assert 0 <= i and i < self.depth
        assert 0 <= k and k < self.width
    
        if i == 0: 
            return self.columns[k][0](x)
        else: 
            if k == 0: 
                return self.columns[k][i](self.call_h(x,i - 1,k))
        
            else: 
                Uh = [self.U[k][j][i](self.call_h(x,i-1,j)) for j in range(k)]
                Wh = self.columns[k][i](self.call_h(x,i-1,k))
                return self.U_activation(tf.add(tf.add_n(Uh),Wh))
    
    def call(self,x):
        return self.call_h(x,self.depth - 1, self.width-1)

In [26]:
layers1 = [tf.keras.layers.Dense(16,activation = tf.keras.activations.relu,trainable = True),
          tf.keras.layers.Dense(32,activation = tf.keras.activations.relu,trainable = True),
          tf.keras.layers.Dense(4,activation = tf.keras.activations.relu,trainable = True)]

layers2 = [tf.keras.layers.Dense(16,activation = tf.keras.activations.relu,trainable = True),
          tf.keras.layers.Dense(32,activation = tf.keras.activations.relu,trainable = True),
          tf.keras.layers.Dense(4,activation = tf.keras.activations.relu,trainable = True)]

net1 = ProgNet(layers = layers1,name= 'net1')
net2 = ProgNet(layers = layers2,prev_net = net1,name = 'net2')

In [27]:

x = tf.placeholder(dtype = tf.float32, shape = [None,1])
y = tf.placeholder(dtype = tf.int32, shape = [None])
bound = 4
size = 100
x_train = np.random.uniform(0,bound,size).reshape(size,1)
expr1 = net1(x)
expr2 = net2(x)
train_vars1 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     "net1")
train_vars2 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     "net2")



In [28]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
sess.run(tf.print(expr1),feed_dict = {x: x_train})
sess.run(tf.print(expr2),feed_dict = {x: x_train})

[[0 0.00283957645 0.069959715 0.031892322]
 [0 0.00645952672 0.159145445 0.0725491196]
 [0 0.00812703371 0.200227559 0.0912771225]
 ...
 [0 0.0370026231 0.911643 0.415587962]
 [0 0.0107375905 0.264544457 0.120597057]
 [0 0.0237074643 0.584089339 0.266267121]]
[[0.0523964167 0.0831112713 0 0]
 [0.119192146 0.189062804 0 0]
 [0.149960726 0.237867817 0 0]
 ...
 [0.682776213 1.08302057 0 0]
 [0.198130906 0.314275563 0 0]
 [0.437454432 0.693890929 0 0]]


In [29]:
expr2

<tf.Tensor 'net2_3/Relu_1:0' shape=(?, 4) dtype=float32>

In [30]:
tf.trainable_variables('net2')

[<tf.Variable 'net2/U_2_0/kernel:0' shape=(32, 4) dtype=float32>,
 <tf.Variable 'net2/U_1_0/kernel:0' shape=(16, 32) dtype=float32>,
 <tf.Variable 'net2/dense_3/kernel:0' shape=(1, 16) dtype=float32>,
 <tf.Variable 'net2/dense_3/bias:0' shape=(16,) dtype=float32>,
 <tf.Variable 'net2/dense_4/kernel:0' shape=(16, 32) dtype=float32>,
 <tf.Variable 'net2/dense_4/bias:0' shape=(32,) dtype=float32>,
 <tf.Variable 'net2/dense_5/kernel:0' shape=(32, 4) dtype=float32>,
 <tf.Variable 'net2/dense_5/bias:0' shape=(4,) dtype=float32>,
 <tf.Variable 'net2_2/U_2_0/kernel:0' shape=(32, 4) dtype=float32>,
 <tf.Variable 'net2_2/U_1_0/kernel:0' shape=(16, 32) dtype=float32>,
 <tf.Variable 'net2_2/dense_9/kernel:0' shape=(1, 16) dtype=float32>,
 <tf.Variable 'net2_2/dense_9/bias:0' shape=(16,) dtype=float32>,
 <tf.Variable 'net2_2/dense_10/kernel:0' shape=(16, 32) dtype=float32>,
 <tf.Variable 'net2_2/dense_10/bias:0' shape=(32,) dtype=float32>,
 <tf.Variable 'net2_2/dense_11/kernel:0' shape=(32, 4) dtyp

In [31]:
tf.trainable_variables('net1')

[<tf.Variable 'net1/dense/kernel:0' shape=(1, 16) dtype=float32>,
 <tf.Variable 'net1/dense/bias:0' shape=(16,) dtype=float32>,
 <tf.Variable 'net1/dense_1/kernel:0' shape=(16, 32) dtype=float32>,
 <tf.Variable 'net1/dense_1/bias:0' shape=(32,) dtype=float32>,
 <tf.Variable 'net1/dense_2/kernel:0' shape=(32, 4) dtype=float32>,
 <tf.Variable 'net1/dense_2/bias:0' shape=(4,) dtype=float32>,
 <tf.Variable 'net1_2/dense_6/kernel:0' shape=(1, 16) dtype=float32>,
 <tf.Variable 'net1_2/dense_6/bias:0' shape=(16,) dtype=float32>,
 <tf.Variable 'net1_2/dense_7/kernel:0' shape=(16, 32) dtype=float32>,
 <tf.Variable 'net1_2/dense_7/bias:0' shape=(32,) dtype=float32>,
 <tf.Variable 'net1_2/dense_8/kernel:0' shape=(32, 4) dtype=float32>,
 <tf.Variable 'net1_2/dense_8/bias:0' shape=(4,) dtype=float32>,
 <tf.Variable 'net1_3/dense_12/kernel:0' shape=(1, 16) dtype=float32>,
 <tf.Variable 'net1_3/dense_12/bias:0' shape=(16,) dtype=float32>,
 <tf.Variable 'net1_3/dense_13/kernel:0' shape=(16, 32) dtype=

# DQN with ProgNet

In [40]:
# @title Install necessary packages.
!pip install --upgrade --no-cache-dir dopamine-rl
!pip install cmake
!pip install atari_py
!pip install gin-config



Requirement already up-to-date: dopamine-rl in /home/psaint/anaconda3/envs/prog-nets-env/lib/python3.6/site-packages (2.0.5)
Collecting cmake
[?25l  Downloading https://files.pythonhosted.org/packages/ff/34/0a311fedffcc7a153bbc0390ef4c378dbc7f09f9865247137f82d62f8e7a/cmake-3.15.3-py3-none-manylinux2010_x86_64.whl (16.5MB)
[K     |████████████████████████████████| 16.5MB 118kB/s eta 0:00:01     |█████████████████████████       | 12.9MB 141kB/s eta 0:00:26     |██████████████████████████▊     | 13.8MB 143kB/s eta 0:00:20
[?25hInstalling collected packages: cmake
Successfully installed cmake-3.15.3


In [41]:
# @title Necessary imports and globals.

import numpy as np
import os
from dopamine.agents.dqn import dqn_agent
from dopamine.discrete_domains import run_experiment
from dopamine.colab import utils as colab_utils
from absl import flags
import gin.tf

BASE_PATH = '/tmp/dqn_with_prognets'  # @param
GAME = 'Acrobot'  # @param





In [None]:
# @title Create an agent based on DQN, but choosing actions randomly.

LOG_PATH = os.path.join(BASE_PATH, 'dqn', GAME)

class ProgNetAgent(dqn_agent.DQNAgent):
    def __init__(self, sess, num_actions,net = None):
    """This maintains all the DQN default argument values."""
        super(ProgNetAgent, self).__init__(sess, num_actions)
        if net is None: 
            pass
        else: 
            self.network = net


def create_prognet_agent(sess, environment, summary_writer=None):
    prog_net = Prog
  return MyRandomDQNAgent(sess, num_actions=environment.action_space.n)

random_dqn_config = """
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
atari_lib.create_atari_environment.game_name = '{}'
atari_lib.create_atari_environment.sticky_actions = True
run_experiment.Runner.num_iterations = 200
run_experiment.Runner.training_steps = 10
run_experiment.Runner.max_steps_per_episode = 100
""".format(GAME)
gin.parse_config(random_dqn_config, skip_unknown=False)

# Create the runner class with this agent. We use very small numbers of steps
# to terminate quickly, as this is mostly meant for demonstrating how one can
# use the framework.
random_dqn_runner = run_experiment.TrainRunner(LOG_PATH, create_random_dqn_agent)

In [36]:
#gym lib like networks for cartpole/acrobot tasks

class CartPoleDQNNetwork(tf.keras.Model):
    """Keras DQN network for Cartpole."""
    def __init__(self,num_actions,net):
        """Builds the deep network used to compute the agent's Q-values.

        It rescales the input features so they lie in range [-1, 1].

        Args:
          num_actions: int, number of actions.
          name: str, used to create scope for network parameters.
        """
        super(CartPoleDQNProgNet, self).__init__(name=prog_net.name)
        self.net = net

    def call(self, state):
        """Creates the output tensor/op given the state tensor as input."""
        x = self.net(state)
        return atari_lib.DQNNetworkType(x)

class CarpoleProgNet(tf.keras.Model):
    def __init__

In [38]:
gym_lib.CARTPOLE_MAX_VALS

array([2.4       , 5.        , 0.26179939, 6.28318531])

In [None]:
layers = [tf.keras.layers.Flatten(),
          tf.keras.layers.Dense(num_actions,activation= tf.keras.activations.relu)]
cartpole_net = 