In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [2]:
# 透過keras layer創建一個自己的dense-layer
# 參數在Layer會自動追蹤

class DenseLayer(layers.Layer):
    def __init__(self, units, input_dim):
        super(DenseLayer, self).__init__()
        w_init = tf.random_normal_initializer()
        b_init = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=w_init(shape=(input_dim, units), dtype='float32'),
            trainable=True,
        )
        self.b = tf.Variable(
            initial_value=b_init(shape=(units,), dtype='float32'),
            trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
# class Linear(keras.layers.Layer):
#     def __init__(self, units=32, input_dim=32):
#         super(Linear, self).__init__()
#         w_init = tf.random_normal_initializer()
#         self.w = tf.Variable(
#             initial_value=w_init(shape=(input_dim, units), dtype="float32"),
#             trainable=True,
#         )
#         b_init = tf.zeros_initializer()
#         self.b = tf.Variable(
#             initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
#         )

#     def call(self, inputs):
#         return tf.matmul(inputs, self.w) + self.b

In [3]:
x = tf.ones((3, 3))
linear_layer = DenseLayer(4, 3)
# linear_layer = Linear(4, 3)
y = linear_layer(x)

y

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[ 0.1382047 , -0.0191696 ,  0.19688514, -0.01656014],
       [ 0.1382047 , -0.0191696 ,  0.19688514, -0.01656014],
       [ 0.1382047 , -0.0191696 ,  0.19688514, -0.01656014]],
      dtype=float32)>

In [4]:
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

In [5]:
# 但上面比較繁瑣，可以透過add_weight()幫助快速建立模型參數

class Linear(layers.Layer):
    def __init__(self, input_dim, units):
        super(Linear, self).__init__()
        self.w = self.add_weight(
            shape=(input_dim, units),
            initializer='random_normal',
            trainable=True
        )
        self.b = self.add_weight(
            shape=(units,),
            initializer='zeros',
            trainable=True
        )
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b



In [6]:
x = tf.ones((12, 3))
la = Linear(3, 20)
y = la(x)

print(y)

tf.Tensor(
[[-0.01628886  0.0107303  -0.15734124  0.02500769  0.02952196  0.02679781
   0.05750535  0.01565486 -0.00667565 -0.03137799  0.03977148  0.1601704
   0.04061134 -0.08541013  0.06478705 -0.13211325  0.05892086  0.02591803
   0.04086535  0.01551829]
 [-0.01628886  0.0107303  -0.15734124  0.02500769  0.02952196  0.02679781
   0.05750535  0.01565486 -0.00667565 -0.03137799  0.03977148  0.1601704
   0.04061134 -0.08541013  0.06478705 -0.13211325  0.05892086  0.02591803
   0.04086535  0.01551829]
 [-0.01628886  0.0107303  -0.15734124  0.02500769  0.02952196  0.02679781
   0.05750535  0.01565486 -0.00667565 -0.03137799  0.03977148  0.1601704
   0.04061134 -0.08541013  0.06478705 -0.13211325  0.05892086  0.02591803
   0.04086535  0.01551829]
 [-0.01628886  0.0107303  -0.15734124  0.02500769  0.02952196  0.02679781
   0.05750535  0.01565486 -0.00667565 -0.03137799  0.03977148  0.1601704
   0.04061134 -0.08541013  0.06478705 -0.13211325  0.05892086  0.02591803
   0.04086535  0.0155182

In [7]:
# 模型內也可以有不可訓練參數，誤差反向傳地就會不記錄其梯度


class ComputeSum(layers.Layer):
    def __init__(self, input_dim):
        super(ComputeSum, self).__init__()
        self.total = tf.Variable(
            initial_value=tf.ones(input_dim,),
            trainable=False
        )
    
    def call(self, inputs):
        self.total.assign_add(tf.reduce_sum(inputs, axis=0))
        # https://www.tensorflow.org/api_docs/python/tf/Variable#assign_add
        return self.total



x = tf.ones((3, 6))
cs = ComputeSum(6)
y = cs(x)
print(y.numpy())        # [1+3]*6

y = cs(x)
print(y.numpy())        # [4 + 3] * 6

[4. 4. 4. 4. 4. 4.]
[7. 7. 7. 7. 7. 7.]


In [8]:
# 觀看layer的權重資訊

print(len(cs.weights))

print(len(cs.non_trainable_weights))

print(len(cs.trainable_weights))

1
1
0


In [9]:
cs.trainable_weights

[]

In [10]:
cs.non_trainable_variables

[<tf.Variable 'Variable:0' shape=(6,) dtype=float32, numpy=array([7., 7., 7., 7., 7., 7.], dtype=float32)>]

In [11]:
cs.non_trainable_weights

[<tf.Variable 'Variable:0' shape=(6,) dtype=float32, numpy=array([7., 7., 7., 7., 7., 7.], dtype=float32)>]

### 最好的應用狀況是: 當不知道input_shape時, layer參數不好設定
- 透過keras api 的 build function去實現!
- 注意: The __call__() method of your layer will automatically run build the first time it is called. You now have a layer that's lazy and thus easier to use:


In [14]:
class Linear(layers.Layer):
    def __init__(self, units):
        super(Linear, self).__init__()
        self.units = units
    
    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer='random_normal',
            trainable=True
        )
        self.b = self.add_weight(
            shape=(self.units, ),
            initializer='random_normal',
            trainable=True
        )
    
    def call(self, x):
        return tf.matmul(x, self.w) + self.b

In [15]:
# 起初不知道input_shape
linear = Linear(32)

# dummy data
x = tf.ones((16000, 128))

# 動態建立參數
y = linear(x)

print(y.shape)

del x, y

(16000, 32)


In [16]:
# Layers are recursively composable
# If you assign a Layer instance as attribute of another Layer, the outer layer will start tracking the weights of the inner layer.
# 很方便, 會自動追蹤，才可以巢狀建立layer
# We recommend creating such sublayers in the __init__() method (since the sublayers will typically have a build method, they will be built when the outer layer gets built).
# 會自動建立, 實際上也是有build的!

# Let's assume we are reusing the Linear class
# with a `build` method that we defined above.


class MLPBlock(keras.layers.Layer):
    def __init__(self):
        super(MLPBlock, self).__init__()
        self.linear_1 = Linear(32)
        self.linear_2 = Linear(32)
        self.linear_3 = Linear(1)

    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.linear_2(x)
        x = tf.nn.relu(x)
        return self.linear_3(x)


mlp = MLPBlock()
y = mlp(tf.ones(shape=(3, 64)))  # The first call to the `mlp` will create the weights
print("weights:", len(mlp.weights))
print("trainable weights:", len(mlp.trainable_weights))

weights: 6
trainable weights: 6
