In [1]:
import tensorflow as tf

# Layers and models

Most models are made of layers. Layers are functions with a known mathematical structure that can be reused and have trainable variables. In TensorFlow, most high-level implementations of layers and models, such as Keras or Sonnet, are built on the same foundational class: tf.Module.

In [7]:
class SimpleModule(tf.Module):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.a_variable = tf.Variable(5., name='train_me')
        self.non_trainable_variable = tf.Variable(5., trainable=False,
                                                   name='do_not_train_me')
    def __call__(self, x):
        return self.a_variable * x + self.non_trainable_variable

In [8]:
simple_module = SimpleModule(name='simple')

In [9]:
simple_module(tf.constant(5.))

<tf.Tensor: shape=(), dtype=float32, numpy=30.0>

### Example of a two-layer linear layer model made out of modules.

First a dense (linear) layer:

In [16]:
class Dense(tf.Module):
    def __init__(self, in_features, out_features, name=None):
        super().__init__(name=name)
        self.w = tf.Variable(tf.random.normal([in_features, out_features]), name='w')
        self.b = tf.Variable(tf.zeros([out_features]), name='b')
    def __call__(self, x):
        y = tf.matmul(x, self.w) + self.b
        return tf.nn.relu(y) 

In [17]:
class SequentialModule(tf.Module):
    def __init__(self, name=None):
        super().__init__(name=name)
        
        self.dense_1 = Dense(in_features=3, out_features=3)
        self.dense_2 = Dense(in_features=3, out_features=2)
        
    def __call__(self, x):
        return self.dense_2(self.dense_1(x))
        

In [18]:
my_model = SequentialModule(name='my_module')
print('Model results: ', my_model(tf.constant([ [2., 2., 2.] ])))
print('Submodules: ', my_model.submodules)
for var in my_model.variables:
    print(var)

Model results:  tf.Tensor([[0.       8.417499]], shape=(1, 2), dtype=float32)
Submodules:  (<__main__.Dense object at 0x000001DAC10AC908>, <__main__.Dense object at 0x000001DAC10AC8C8>)
<tf.Variable 'b:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>
<tf.Variable 'w:0' shape=(3, 3) dtype=float32, numpy=
array([[ 0.5442032 , -1.6187414 ,  1.4587874 ],
       [-0.20054685, -0.82662535, -1.0183262 ],
       [ 2.010697  ,  1.5654202 , -1.0852054 ]], dtype=float32)>
<tf.Variable 'b:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>
<tf.Variable 'w:0' shape=(3, 2) dtype=float32, numpy=
array([[-0.04522573,  1.7876456 ],
       [ 0.6071877 , -1.6608255 ],
       [-0.50740117,  0.91678923]], dtype=float32)>


### Example of a two-layer linear layer model made out of modules with unknown input dimension.

In [19]:
class FlexibleDenseModule(tf.Module):
    def __init__(self, out_features, name=None):
        super().__init__(name=name)
        self.is_built = False
        self.out_features = out_features
        
    def __call__(self, x):
        if self.is_built != True:
            self.w = tf.Variable(tf.random.normal([x.shape[-1], self.out_features]), name='w')
            self.b = tf.Variable(tf.zeros([self.out_features]), name='b')
            self.is_built = True
        
        y = tf.matmul(x, self.w) + self.b
        return tf.nn.relu(y)
    

In [20]:
class MyFlexibleSequentialModule(tf.Module):
    def __init__(self, name=None):
        super().__init__(name=name)

        self.dense_1 = FlexibleDenseModule(out_features=3)
        self.dense_2 = FlexibleDenseModule(out_features=2)

    def __call__(self, x):
        x = self.dense_1(x)
        return self.dense_2(x)

In [21]:
my_model = MyFlexibleSequentialModule(name="the_model")
print("Flexible Model results:", my_model(tf.constant([[2.0, 2.0, 2.0]])))

Flexible Model results: tf.Tensor([[6.5807467 0.       ]], shape=(1, 2), dtype=float32)


### Saving weights
You can save a tf.Module as both a checkpoint and a SavedModel.

Checkpoints are just the weights (that is, the values of the set of variables inside the module and its submodules):

In [None]:
chkp_path = "my_checkpoint"
checkpoint = tf.train.Checkpoint(model=my_model)
checkpoint.write(chkp_path)

You can look inside a checkpoint to be sure the whole collection of variables is saved, sorted by the Python object that contains them.

In [None]:
tf.train.list_variables(chkp_path)

When you load models back in, you overwrite the values in your Python object.

In [None]:
new_model = SequentialModule()
new_checkpoint = tf.train.Checkpoint(model=new_model)
new_checkpoint.restore("my_checkpoint")

# Should be the same result as above
new_model(tf.constant([[2.0, 2.0, 2.0]]))

### Saving functions

In [22]:
class MySequentialModule(tf.Module):
    def __init__(self, name=None):
        super().__init__(name=name)

        self.dense_1 = Dense(in_features=3, out_features=3)
        self.dense_2 = Dense(in_features=3, out_features=2)

    @tf.function
    def __call__(self, x):
        x = self.dense_1(x)
        return self.dense_2(x)

# You have made a model with a graph!
my_model = MySequentialModule(name="the_model")

In [None]:
tf.saved_model.save(my_model, "the_saved_model")
new_model = tf.saved_model.load("the_saved_model")

### Keras models and layers
tf.keras.layers.Layer is the base class of all Keras layers, and it inherits from tf.Module.

You can convert a module into a Keras layer just by swapping out the parent and then changing __call__ to call:

In [23]:
class FlexibleDense(tf.keras.layers.Layer):
    def __init__(self, out_features, **kwargs):
        super().__init__(**kwargs)
        self.out_features = out_features

    # Create the state of the layer (weights)
    def build(self, input_shape):
        self.w = tf.Variable(tf.random.normal( [input_shape[-1], self.out_features] ), name='w')
        self.b = tf.Variable(tf.zeros([self.out_features]), name='b')

    def call(self, inputs):
        y = tf.matmul(inputs, self.w) + self.b
        return y

Create the instance of the layer

In [24]:
flexible_dense = FlexibleDense(out_features=3)
print('flexible_dense variables before build: ', flexible_dense.variables)

flexible_dense variables before build:  []


Call it, with predictably random results

In [25]:
print("Model results:", flexible_dense(tf.constant([[2.0, 2.0, 2.0], [3.0, 3.0, 3.0]])))

Model results: tf.Tensor(
[[2.6415823 2.0614662 3.2402472]
 [3.9623735 3.0921993 4.8603706]], shape=(2, 3), dtype=float32)


In [26]:
flexible_dense.variables

[<tf.Variable 'flexible_dense/w:0' shape=(3, 3) dtype=float32, numpy=
 array([[ 0.98374176,  0.120131  , -0.31949654],
        [ 0.7948522 ,  0.55656505,  0.8438763 ],
        [-0.45780286,  0.35403702,  1.0957439 ]], dtype=float32)>,
 <tf.Variable 'flexible_dense/b:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>]

Since build is only called once, inputs will be rejected if the input shape is not compatible with the layer's variables:

In [27]:
try:
  print("Model results:", flexible_dense(tf.constant([[2.0, 2.0, 2.0, 2.0]])))
except tf.errors.InvalidArgumentError as e:
  print("Failed:", e)

Failed: In[0] mismatch In[1] shape: 4 vs. 3: [1,4] [3,3] 0 0 [Op:MatMul]


### Keras models

You can define the SequentialModule from above with nearly identical code, again converting __call__ to call() and changing the parent:

In [28]:
class MySequentialModel(tf.keras.Model):
    def __init__(self, name=None, **kwargs):
        super().__init__(**kwargs)

        self.dense_1 = FlexibleDense(out_features=3)
        self.dense_2 = FlexibleDense(out_features=2)
  
    def call(self, x):
        x = self.dense_1(x)
        return self.dense_2(x)

In [29]:
my_sequential_model = MySequentialModel(name="the_model")
print("Model results:", my_sequential_model(tf.constant([[2.0, 2.0, 2.0]])))

Model results: tf.Tensor([[-1.5817571  4.0405645]], shape=(1, 2), dtype=float32)


In [30]:
my_sequential_model.variables

[<tf.Variable 'my_sequential_model/flexible_dense_1/w:0' shape=(3, 3) dtype=float32, numpy=
 array([[ 0.3316771 ,  2.7431345 , -1.8457296 ],
        [ 0.64744586,  0.10166351, -0.19677602],
        [ 0.4126471 , -2.0158067 , -1.9062277 ]], dtype=float32)>,
 <tf.Variable 'my_sequential_model/flexible_dense_1/b:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>,
 <tf.Variable 'my_sequential_model/flexible_dense_2/w:0' shape=(3, 2) dtype=float32, numpy=
 array([[0.65136623, 1.0350678 ],
        [0.7125623 , 1.4817948 ],
        [0.5794614 , 0.16427782]], dtype=float32)>,
 <tf.Variable 'my_sequential_model/flexible_dense_2/b:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]

In [31]:
my_sequential_model.submodules

(<__main__.FlexibleDense at 0x1dac2adad08>,
 <__main__.FlexibleDense at 0x1dac1afe1c8>)

Overriding tf.keras.Model is a very Pythonic approach to building TensorFlow models. If you are migrating models from other frameworks, this can be very straightforward.

If you are constructing models that are simple assemblages of existing layers and inputs, you can save time and space by using the functional API, which comes with additional features around model reconstruction and architecture.

Here is the same model with the functional API:

In [32]:
inputs = tf.keras.Input(shape=[3,])

x = FlexibleDense(3)(inputs)
x = FlexibleDense(2)(x)

my_functional_model = tf.keras.Model(inputs=inputs, outputs=x)

my_functional_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 3)]               0         
_________________________________________________________________
flexible_dense_3 (FlexibleDe (None, 3)                 12        
_________________________________________________________________
flexible_dense_4 (FlexibleDe (None, 2)                 8         
Total params: 20
Trainable params: 20
Non-trainable params: 0
_________________________________________________________________


In [33]:
my_functional_model(tf.constant([[2.0, 2.0, 2.0]]))

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.03417134, -4.5919423 ]], dtype=float32)>

### Saving Keras models

In [None]:
my_sequential_model.save("exname_of_file")
reconstructed_model = tf.keras.models.load_model("exname_of_file")