In [1]:
# -*- coding:utf-8 -*-
# !/usr/bin/python
# Author: Selvaria

# 模型构建和设计

import tensorflow as tf
import numpy as np
print(tf.__version__)

  from ._conv import register_converters as _register_converters


2.0.0


In [5]:
X = tf.random.uniform((2,20)) 
X

<tf.Tensor: id=49, shape=(2, 20), dtype=float32, numpy=
array([[3.6156774e-02, 1.4954937e-01, 3.4812140e-01, 4.6542645e-01,
        2.2801602e-01, 2.5271642e-01, 1.3856304e-01, 7.2027147e-01,
        9.6304202e-01, 5.4294157e-01, 2.2437203e-01, 7.1472394e-01,
        3.9470804e-01, 9.1624260e-04, 3.8572931e-01, 9.5799685e-01,
        1.2486267e-01, 2.7660322e-01, 7.2270083e-01, 3.4564829e-01],
       [7.9393077e-01, 5.1483917e-01, 5.7949400e-01, 3.1905949e-01,
        6.4718223e-01, 2.4003196e-01, 7.6118279e-01, 2.1996057e-01,
        7.2862136e-01, 9.1994321e-01, 5.1285851e-01, 1.5239441e-01,
        6.2276721e-01, 5.4478800e-01, 4.5891428e-01, 8.5810685e-01,
        2.8366303e-01, 5.6518555e-01, 8.6875117e-01, 5.0242186e-02]],
      dtype=float32)>

In [2]:
#自定义MLP模型结构

class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         
        x = self.flatten(inputs)   
        x = self.dense1(x)    
        output = self.dense2(x)     
        return output
    
X = tf.random.uniform((2,20)) 
net = MLP()
net(X)

<tf.Tensor: id=62, shape=(2, 10), dtype=float32, numpy=
array([[ 0.14354907,  0.05338682, -0.40425608, -0.2587788 , -0.08983592,
         0.24334475,  0.08793818, -0.5242768 , -0.06720618,  0.17471312],
       [-0.01480664,  0.16989782, -0.4945268 , -0.17466378,  0.14202915,
        -0.02115927, -0.0175516 , -0.611707  , -0.027931  ,  0.10718849]],
      dtype=float32)>

In [3]:
# 正常的keras实现：也可以用add函数

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10),
])

model(X)

<tf.Tensor: id=117, shape=(2, 10), dtype=float32, numpy=
array([[-0.0521878 ,  0.0668958 , -0.06255163,  0.03062708, -0.14593492,
         0.10147388, -0.04072712, -0.11214042, -0.02359716, -0.13605078],
       [-0.2278136 , -0.04991286,  0.08795225,  0.17137462, -0.02623325,
         0.07074243, -0.05856334, -0.07146536,  0.12810664, -0.07756173]],
      dtype=float32)>

In [4]:
# 更灵活一些

class FancyMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.rand_weight = tf.constant(
            tf.random.uniform((20,20)))
        self.dense = tf.keras.layers.Dense(units=20, activation=tf.nn.relu)

    def call(self, inputs):         
        x = self.flatten(inputs)   
        x = tf.nn.relu(tf.matmul(x, self.rand_weight) + 1)
        x = self.dense(x)    
        while tf.norm(x) > 1:
            x /= 2
        if tf.norm(x) < 0.8:
            x *= 10
        return tf.reduce_sum(x)

In [5]:
# 定义后可直接通过add函数调用

class NestMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.net = tf.keras.Sequential()
        self.net.add(tf.keras.layers.Flatten())
        self.net.add(tf.keras.layers.Dense(64, activation=tf.nn.relu))
        self.net.add(tf.keras.layers.Dense(32, activation=tf.nn.relu))
        self.dense = tf.keras.layers.Dense(units=16, activation=tf.nn.relu)


    def call(self, inputs):         
        return self.dense(self.net(inputs))

net = tf.keras.Sequential()
net.add(NestMLP())
net.add(tf.keras.layers.Dense(20))
net.add(FancyMLP())

net(X)

<tf.Tensor: id=311, shape=(), dtype=float32, numpy=23.746525>

In [3]:
# 模型的权重

net = tf.keras.models.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(256,activation=tf.nn.relu))
net.add(tf.keras.layers.Dense(10))

X = tf.random.uniform((2,20))
Y = net(X)
Y

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[-0.02536002,  0.21001825,  0.2942347 ,  0.37226382, -0.27594352,
        -0.1072491 , -0.01887954, -0.16058698, -0.0123478 ,  0.0620673 ],
       [-0.00090306,  0.30016986,  0.2668451 ,  0.20013687, -0.45570928,
        -0.09548484, -0.31233752, -0.21917205,  0.00874135,  0.05590382]],
      dtype=float32)>

In [4]:
net.weights[0], type(net.weights[0])

(<tf.Variable 'sequential/dense/kernel:0' shape=(20, 256) dtype=float32, numpy=
 array([[ 0.04821877,  0.01971839, -0.1322133 , ..., -0.02457005,
         -0.0513386 , -0.04596808],
        [-0.08145669,  0.10399023,  0.00986847, ...,  0.09806436,
         -0.08687238,  0.09704998],
        [ 0.00680166,  0.14200485,  0.11452475, ..., -0.00169985,
          0.03269209,  0.07975852],
        ...,
        [ 0.14109606,  0.10782537, -0.04107142, ...,  0.08147974,
          0.08208396,  0.02260047],
        [-0.08611073,  0.12286136,  0.01139548, ...,  0.13120255,
          0.10408655,  0.0368717 ],
        [ 0.09235047, -0.03619536, -0.146383  , ..., -0.13490865,
          0.09247346, -0.03999708]], dtype=float32)>,
 tensorflow.python.ops.resource_variable_ops.ResourceVariable)

In [5]:
# 初始化 设为均值为0、标准差为0.01的正态分布随机数(代码里似乎并非这样），并依然将偏差参数清零

class Linear(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.d1 = tf.keras.layers.Dense(
            units=10,
            activation=None,
            kernel_initializer=tf.zeros_initializer(), # tf.random_normal_initializer(mean=0.0, stddev=0.01)
            bias_initializer=tf.zeros_initializer()
        )
        self.d2 = tf.keras.layers.Dense(
            units=1,
            activation=None,
            kernel_initializer=tf.ones_initializer(),
            bias_initializer=tf.ones_initializer()
        )

    def call(self, input):
        x = self.d1(input)
        output = self.d2(x)
        return output

net = Linear()
net(X)
net.get_weights()

[array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

In [7]:
net.weights

[<tf.Variable 'linear/dense_2/kernel:0' shape=(20, 10) dtype=float32, numpy=
 array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0.

In [9]:
# 自定义初始化，感觉没啥区别

def my_init():
    return tf.keras.initializers.Ones()

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(64, kernel_initializer=my_init()))

Y = model(X)
model.weights

[<tf.Variable 'sequential_2/dense_5/kernel:0' shape=(20, 64) dtype=float32, numpy=
 array([[1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'sequential_2/dense_5/bias:0' shape=(64,) dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>]

In [2]:
# 自定义层(layer)

# 自定义了一个将输入减掉均值后输出的层，并将层的计算定义在了call函数里。这个层里不含模型参数
class CenteredLayer(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__()

    def call(self, inputs):
        return inputs - tf.reduce_mean(inputs)
    
layer = CenteredLayer()
layer(np.array([1,2,3,4,5]))

<tf.Tensor: id=4, shape=(5,), dtype=int32, numpy=array([-2, -1,  0,  1,  2])>

In [4]:
# 直接应用于模型的构建

net = tf.keras.models.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(20))
net.add(CenteredLayer())

X = tf.random.uniform((2,20)) # 
Y = net(X)
Y

<tf.Tensor: id=42, shape=(2, 20), dtype=float32, numpy=
array([[-0.09796398, -0.9546359 ,  0.8371567 ,  0.8377847 ,  0.13546357,
         0.18864506, -0.2659811 , -0.12369398, -0.56885   ,  0.02776444,
        -0.14654467, -0.24138471, -0.5922776 , -0.67358243, -0.5022928 ,
         0.51680523,  0.07384016,  0.6827046 ,  0.4636204 ,  0.8489546 ],
       [-0.037692  , -0.9103488 ,  1.1209129 ,  0.09312033, -0.47520128,
         0.11904681, -0.36028138,  0.02276197, -0.7714759 ,  0.04662424,
         0.10191447,  0.34162793, -0.3963228 , -0.41576925,  0.3425236 ,
         0.14212367, -0.2709455 , -0.21559405,  0.11220275,  0.9652403 ]],
      dtype=float32)>

In [10]:
# 自定义含模型参数的自定义层。其中的模型参数可以通过训练学出

# 自定义实现全连接层的计算
class myDense(tf.keras.layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.units = units

    def build(self, input_shape):     # 这里 input_shape 是第一次运行call()时参数inputs的形状
        print('units:',self.units) # 这是输出单元个数
        print('input_shape:',input_shape) #
        self.w = self.add_weight(name='w',
            shape=[input_shape[-1], self.units], initializer=tf.random_normal_initializer())
        self.b = self.add_weight(name='b',
            shape=[self.units], initializer=tf.zeros_initializer())

    def call(self, inputs):
        y_pred = tf.matmul(inputs, self.w) + self.b
        return y_pred
    
    
dense = myDense(4)
dense(X)
dense.get_weights()

units: 4
input_shape: (2, 20)


[array([[-0.01107107, -0.06184303,  0.02175089,  0.11343133],
        [ 0.07814262, -0.04085029, -0.00610991,  0.03317473],
        [-0.04373477,  0.0601783 ,  0.03261429,  0.10310387],
        [ 0.04387454,  0.01911958, -0.00698581, -0.023418  ],
        [-0.09526867,  0.03220423,  0.03135252,  0.01348117],
        [ 0.05873218,  0.02445574,  0.05837744,  0.00269256],
        [ 0.01526726, -0.00449585, -0.06728358,  0.02487175],
        [-0.07703592,  0.00366705, -0.00213093,  0.00627941],
        [ 0.07019436, -0.00303624,  0.01894838, -0.04115822],
        [-0.01011225,  0.0024974 ,  0.03161853,  0.01224643],
        [-0.08700826,  0.03375994, -0.02537072, -0.03417142],
        [ 0.0036299 ,  0.08044641,  0.1015109 ,  0.11611362],
        [ 0.05359798,  0.02131139, -0.0219804 , -0.07504892],
        [-0.03139121,  0.03468395,  0.01180844, -0.05017263],
        [ 0.03015341,  0.02195281,  0.01837931,  0.02318553],
        [ 0.02733223,  0.0120684 , -0.0793448 , -0.01146867],
        

In [11]:
net = tf.keras.models.Sequential()
net.add(myDense(8))
net.add(myDense(1))

net(X)

units: 8
input_shape: (2, 20)
units: 1
input_shape: (2, 8)


<tf.Tensor: id=207, shape=(2, 1), dtype=float32, numpy=
array([[-0.00804436],
       [-0.01073209]], dtype=float32)>