In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [12]:
# 透過keras layer創建一個自己的dense-layer
# 參數在Layer會自動追蹤

class DenseLayer(layers.Layer):
    def __init__(self, units, input_dim):
        super(DenseLayer, self).__init__()
        w_init = tf.random_normal_initializer()
        b_init = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=w_init(shape=(input_dim, units), dtype='float32'),
            trainable=True,
        )
        self.b = tf.Variable(
            initial_value=b_init(shape=(units,), dtype='float32'),
            trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
# class Linear(keras.layers.Layer):
#     def __init__(self, units=32, input_dim=32):
#         super(Linear, self).__init__()
#         w_init = tf.random_normal_initializer()
#         self.w = tf.Variable(
#             initial_value=w_init(shape=(input_dim, units), dtype="float32"),
#             trainable=True,
#         )
#         b_init = tf.zeros_initializer()
#         self.b = tf.Variable(
#             initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
#         )

#     def call(self, inputs):
#         return tf.matmul(inputs, self.w) + self.b

In [14]:
x = tf.ones((3, 3))
linear_layer = DenseLayer(4, 3)
# linear_layer = Linear(4, 3)
y = linear_layer(x)

y

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0.09184986, 0.08719187, 0.00100254, 0.22106475],
       [0.09184986, 0.08719187, 0.00100254, 0.22106475],
       [0.09184986, 0.08719187, 0.00100254, 0.22106475]], dtype=float32)>

In [15]:
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

In [16]:
# 但上面比較繁瑣，可以透過add_weight()幫助快速建立模型參數

class Linear(layers.Layer):
    def __init__(self, input_dim, units):
        super(Linear, self).__init__()
        self.w = self.add_weight(
            shape=(input_dim, units),
            initializer='random_normal',
            trainable=True
        )
        self.b = self.add_weight(
            shape=(units,),
            initializer='zeros',
            trainable=True
        )
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b



In [17]:
x = tf.ones((12, 3))
la = Linear(3, 20)
y = la(x)

print(y)

tf.Tensor(
[[-0.03926901 -0.03419858 -0.01376631  0.12769729  0.15336572 -0.13010372
  -0.02338034 -0.01656438 -0.04477892  0.0151759  -0.08023205 -0.0214783
   0.17287059  0.0479687  -0.09878155 -0.10114719 -0.16871123  0.0141742
  -0.09235399  0.01231059]
 [-0.03926901 -0.03419858 -0.01376631  0.12769729  0.15336572 -0.13010372
  -0.02338034 -0.01656438 -0.04477892  0.0151759  -0.08023205 -0.0214783
   0.17287059  0.0479687  -0.09878155 -0.10114719 -0.16871123  0.0141742
  -0.09235399  0.01231059]
 [-0.03926901 -0.03419858 -0.01376631  0.12769729  0.15336572 -0.13010372
  -0.02338034 -0.01656438 -0.04477892  0.0151759  -0.08023205 -0.0214783
   0.17287059  0.0479687  -0.09878155 -0.10114719 -0.16871123  0.0141742
  -0.09235399  0.01231059]
 [-0.03926901 -0.03419858 -0.01376631  0.12769729  0.15336572 -0.13010372
  -0.02338034 -0.01656438 -0.04477892  0.0151759  -0.08023205 -0.0214783
   0.17287059  0.0479687  -0.09878155 -0.10114719 -0.16871123  0.0141742
  -0.09235399  0.01231059]
 

In [19]:
# 模型內也可以有不可訓練參數，誤差反向傳地就會不記錄其梯度


class ComputeSum(layers.Layer):
    def __init__(self, input_dim):
        super(ComputeSum, self).__init__()
        self.total = tf.Variable(
            initial_value=tf.ones(input_dim,),
            trainable=False
        )
    
    def call(self, inputs):
        self.total.assign_add(tf.reduce_sum(inputs, axis=0))
        # https://www.tensorflow.org/api_docs/python/tf/Variable#assign_add
        return self.total



x = tf.ones((3, 6))
cs = ComputeSum(6)
y = cs(x)
print(y.numpy())        # [1+3]*6

y = cs(x)
print(y.numpy())        # [4 + 3] * 6

[4. 4. 4. 4. 4. 4.]
[7. 7. 7. 7. 7. 7.]


In [20]:
# 觀看layer的權重資訊

print(len(cs.weights))

print(len(cs.non_trainable_weights))

print(len(cs.trainable_weights))

1
1
0


In [21]:
cs.trainable_weights

[]

In [22]:
cs.non_trainable_variables

[<tf.Variable 'Variable:0' shape=(6,) dtype=float32, numpy=array([7., 7., 7., 7., 7., 7.], dtype=float32)>]

In [23]:
cs.non_trainable_weights

[<tf.Variable 'Variable:0' shape=(6,) dtype=float32, numpy=array([7., 7., 7., 7., 7., 7.], dtype=float32)>]