In [1]:
import tensorflow as tf
import numpy as np

## EMA(Exponential Moving Average)

In [2]:
class EMA:
    def __init__(self, model, decay):
        self.model = model
        self.decay = decay
        self.shadow = {}
        self.backup = {}
        self.register()

    # 注册所有需要跟踪的变量
    def register(self):
        for param in self.model.variables:
            if param.trainable:
                self.shadow[param.name] = param.value()

    # 每次变量的值改变后更新影子变量的值
    def update(self):
        for param in self.model.variables:
            if param.trainable:
                assert param.name in self.shadow
                new_average = (1.0 - self.decay) * param.value() + self.decay * self.shadow[param.name]
                self.shadow[param.name] = new_average

    # 将模型参数变成影子值，backup是真实值的备份
    def apply_shadow(self):
        for param in self.model.variables:
            if param.trainable:
                assert param.name in self.shadow
                self.backup[param.name] = param.value()
                param.assign(self.shadow[param.name])
                

    # 将模型的参数变回真实值
    def restore(self):
        for param in self.model.variables:
            if param.trainable:
                assert param.name in self.backup
                param.assign(self.backup[param.name])
        self.backup = {}


## 数据集

In [54]:
np.random.seed(1234) #设置随机种子为1234
dataset_train = np.random.rand(10,7)
print(dataset_train)
labels = [1,1,0,0,1,1,1,0,0,1]

[[0.19151945 0.62210877 0.43772774 0.78535858 0.77997581 0.27259261
  0.27646426]
 [0.80187218 0.95813935 0.87593263 0.35781727 0.50099513 0.68346294
  0.71270203]
 [0.37025075 0.56119619 0.50308317 0.01376845 0.77282662 0.88264119
  0.36488598]
 [0.61539618 0.07538124 0.36882401 0.9331401  0.65137814 0.39720258
  0.78873014]
 [0.31683612 0.56809865 0.86912739 0.43617342 0.80214764 0.14376682
  0.70426097]
 [0.70458131 0.21879211 0.92486763 0.44214076 0.90931596 0.05980922
  0.18428708]
 [0.04735528 0.67488094 0.59462478 0.53331016 0.04332406 0.56143308
  0.32966845]
 [0.50296683 0.11189432 0.60719371 0.56594464 0.00676406 0.61744171
  0.91212289]
 [0.79052413 0.99208147 0.95880176 0.79196414 0.28525096 0.62491671
  0.4780938 ]
 [0.19567518 0.38231745 0.05387369 0.45164841 0.98200474 0.1239427
  0.1193809 ]]


## 模型定义与初始化

In [4]:
class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel,self).__init__()
        self.dense_1 = tf.keras.layers.Dense(7,activation=tf.nn.relu)
        self.dense_2 = tf.keras.layers.Dense(2,activation=tf.nn.softmax)
    
    def build(self,inputs):
        pass
    
    def call(self,inputs):
        x = self.dense_1(inputs)
        output = self.dense_2(x)
        return output

In [5]:
model = MyModel()
model(dataset_train)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Tensor: shape=(10, 2), dtype=float32, numpy=
array([[0.35131532, 0.6486846 ],
       [0.22811796, 0.77188206],
       [0.22424309, 0.77575696],
       [0.38117865, 0.6188214 ],
       [0.33491343, 0.66508657],
       [0.39518434, 0.60481566],
       [0.4021208 , 0.59787923],
       [0.4709828 , 0.5290172 ],
       [0.3412144 , 0.6587856 ],
       [0.30186206, 0.69813794]], dtype=float32)>

## 训练

In [6]:
# global_step变量记录迭代的次数，可以用于动态控制衰减率
global_step = tf.Variable(0, trainable=False)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, epsilon=1e-8)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

In [7]:
# 定义滑动平均的类。初始化时给定了衰减率(0.999)和控制衰减率的变量step，
# ema = tf.train.ExponentialMovingAverage(0.9, global_step)
ema = EMA(model, 0.999)
ema.register()

In [8]:
for num_epoch in range(100):
    global_step = tf.add(global_step,1)
    with tf.GradientTape() as tape:
        predictions = model(dataset_train)
        loss = loss_object(labels,predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        # 更新参数的 EMA 值
        ema.update()

## 验证EMA

In [49]:
# 对比模型参数和相应的影子参数值
tf.equal(model.variables[0], ema.shadow[model.variables[0].name],all)

<tf.Tensor: shape=(7, 7), dtype=bool, numpy=
array([[False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False]])>

In [53]:
# 将模型参数值变成影子值
ema.apply_shadow()
tf.equal(model.variables[0], ema.shadow[model.variables[0].name],all)

<tf.Tensor: shape=(7, 7), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True]])>

In [52]:
# 将模型参数值变回真实值
ema.restore()
tf.equal(model.variables[0], ema.shadow[model.variables[0].name],all)

<tf.Tensor: shape=(7, 7), dtype=bool, numpy=
array([[False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False, False,  True, False],
       [False, False, False, False, False, False, False],
       [False, False, False, False, False, False, False],
       [False, False, False, False, False,  True, False]])>