## 第一种方法直接用keras的fit方法，以fashion mnist为例

In [44]:
import tensorflow as tf
import time
import datetime
import matplotlib.pyplot as plt
from functools import partial
import numpy as np
import os

### 配置超参数

In [41]:
batch_size=64
epochs=10
regularizer=1e-3
total_train_samples=60000
total_test_samples=10000
lr_decay_epochs=1
output_folder="./model_output"
#用来保存模型以及我们需要的所有东西
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
save_format="hdf5" #或saved_model
if save_format=="hdf5":
    save_path=os.path.join(output_folder,"hdf5_models")
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    save_path=os.path.join(output_folder,"ckpt_{epoch:02d}_{val_accuracy:.3f}.hdf5")
elif save_format=="saved_model":
    save_path=os.path.join(output_folder,"saved_models")
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    save_path=os.path.join(output_folder,"ckpt_{epoch:02d}_{val_accuracy:.3f}.ckpt")

### 选择指定显卡及自动调用显存

In [11]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')#列出所有可见显卡
print("All the available GPUs:\n",physical_devices)
if physical_devices:
    gpu=physical_devices[0]#显示第一块显卡
    tf.config.experimental.set_memory_growth(gpu, True)#根据需要自动增长显存
    tf.config.experimental.set_visible_devices(gpu, 'GPU')#只选择第一块

All the available GPUs:
 [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### 准备数据

In [4]:
fashion_mnist=tf.keras.datasets.fashion_mnist
(train_x,train_y),(test_x,test_y)=fashion_mnist.load_data()

train_x,test_x = train_x[...,np.newaxis]/255.0,test_x[...,np.newaxis]/255.0

### 使用tf.data来准备训练集和测试集

In [5]:
train_ds = tf.data.Dataset.from_tensor_slices((train_x,train_y))
test_ds = tf.data.Dataset.from_tensor_slices((test_x,test_y))
 
train_ds=train_ds.shuffle(buffer_size=batch_size*10).batch(batch_size).prefetch(buffer_size = tf.data.experimental.AUTOTUNE).repeat()
test_ds = test_ds.batch(batch_size).prefetch(buffer_size = tf.data.experimental.AUTOTUNE).repeat()

### 准备模型定义

In [6]:
l2 = tf.keras.regularizers.l2(regularizer)#定义模型正则化方法
ini = tf.keras.initializers.he_normal()#定义参数初始化方法
conv2d = partial(tf.keras.layers.Conv2D,activation='relu',padding='same',kernel_regularizer=l2,bias_regularizer=l2)
fc = partial(tf.keras.layers.Dense,activation='relu',kernel_regularizer=l2,bias_regularizer=l2)
maxpool=tf.keras.layers.MaxPooling2D
dropout=tf.keras.layers.Dropout


### 开始定义模型,用functional方法

In [9]:
x_input = tf.keras.layers.Input(shape=(28,28,1))
x = conv2d(128,(5,5))(x_input)
x = maxpool((2,2))(x)
x = conv2d(256,(5,5))(x)
x = maxpool((2,2))(x)
x = tf.keras.layers.Flatten()(x)
x = fc(128)(x)
x_output=fc(10,activation=None)(x)
model = tf.keras.models.Model(inputs=x_input,outputs=x_output)                


### 打印模型结构

In [14]:
print("The model architure:\n")
print(model.summary())

The model architure:

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 28, 28, 128)       3328      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 14, 14, 256)       819456    
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 7, 7, 256)         0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 12544)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 128

### 定义优化算法和损失函数

In [15]:
#学习率变化设置，使用指数衰减
train_steps_per_epoch=int(total_train_samples//batch_size)
initial_learning_rate=0.01
# lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate,
#                                                              decay_steps=1*train_steps_per_epoch,
#                                                             decay_rate=0.96,
#                                                             staircase=True)#initial_learning_rate*0.96**(step/decay_steps)
#优化算法
optimizer = tf.keras.optimizers.SGD(learning_rate=initial_learning_rate,momentum=0.95)
#损失函数
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#评价指标
#metrics=['accuracy','sparse_categorical_crossentropy']#第二个会返回交叉熵的结果，用loss减去该值就会得到正则化的值
metrics=[tf.keras.metrics.Accuracy(),tf.keras.metrics.SparseCategoricalCrossentropy()]#这两种定义方式都可以



### 编译模型

In [16]:
model.compile(optimizer=optimizer,loss=loss,metrics=metrics)

### 定义callbacks

In [None]:
# 要有模型checkpoints,我们先用hdf5格式，只有一个文件简单易用，如果要用saved_model，也可以加载后再转存成saved_model,过程简单
#我们做两种试验,保存不同的模型结构
ckpt = tf.keras.callbacks.ModelCheckpoint(save_path,monitor='val_accuracy',verbose=1,
                                         save_best_only=False,save_weights_only=False,
                                         save_frequency=1)#参数具体函意，查看官方文档
#当模型训练不符合我们要求时停止训练
earlystop=tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',min_delta = 0.00001,patience=5)#连续5个epoch验证集精度没有提高0.001%停



#### 自定义学习率衰减并打印出来

In [43]:
class LearningRateExponentialDecay(tf.keras.optimizers.schedules.ExponentialDecay):
    def __init__(self,initial_learning_rate,decay_steps,decay_rate,staircase=False,name=None):
        super().__init__(initial_learning_rate,decay_steps,decay_rate,staircase=False,name=None)
    #在原来
    def plot(self,epochs,title="Learning Rate Schedule"):
        #计算学习率随epoch的变化值
        lrs = [self(i) for i in epochs]
        plt.figure()
        plt.plot(epochs,lrs)
        plt.title(title)
        plt.xlable("Epoch #")
        plt.ylable("Learning Rate")
    
lr_schedule = LearningRateExponentialDecay(initial_learning_rate,
                                                             decay_steps=lr_decay_epochs*train_steps_per_epoch,
                                                            decay_rate=0.96,
                                                            staircase=True)#initial_learning_rate*0.96**(step/decay_steps)
lr = tf.keras.callbacks.LearningRateScheduler(lr_schedule)
#把学习率在每个epoch结速的时候打印
class PrintLR(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print('\nLearning rate for epoch {} is {}'.format(epoch + 1,
                                                      model.optimizer.lr.numpy()))
print_lr = PrintLR()

In [46]:
#还要加入tensorboard的使用,这种方法记录的内容有限
log_dir= os.path.join(output_folder,'logs_{}'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir)

# class Tensorboard(tf.keras.callbacks.TensorBoard):
#     def on_epoch_end(self,epoch,logs=None)
#         super().on_epoch_end(epoch,logs=None)
        

In [30]:

datetime.datetime.now

In [31]:
help(LearningRateExponentialDecay)

Help on class LearningRateExponentialDecay in module __main__:

class LearningRateExponentialDecay(tensorflow.python.keras.optimizer_v2.learning_rate_schedule.ExponentialDecay)
 |  A LearningRateSchedule that uses an exponential decay schedule.
 |  
 |  Method resolution order:
 |      LearningRateExponentialDecay
 |      tensorflow.python.keras.optimizer_v2.learning_rate_schedule.ExponentialDecay
 |      tensorflow.python.keras.optimizer_v2.learning_rate_schedule.LearningRateSchedule
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  plot(self, epochs, title='Learning Rate Schedule')
 |  
 |  ----------------------------------------------------------------------
 |  Methods inherited from tensorflow.python.keras.optimizer_v2.learning_rate_schedule.ExponentialDecay:
 |  
 |  __call__(self, step)
 |      Call self as a function.
 |  
 |  __init__(self, initial_learning_rate, decay_steps, decay_rate, staircase=False, name=None)
 |      Applies exponential decay to the learni

In [32]:
help(tf.keras.optimizers.schedules.ExponentialDecay)

Help on class ExponentialDecay in module tensorflow.python.keras.optimizer_v2.learning_rate_schedule:

class ExponentialDecay(LearningRateSchedule)
 |  A LearningRateSchedule that uses an exponential decay schedule.
 |  
 |  Method resolution order:
 |      ExponentialDecay
 |      LearningRateSchedule
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __call__(self, step)
 |      Call self as a function.
 |  
 |  __init__(self, initial_learning_rate, decay_steps, decay_rate, staircase=False, name=None)
 |      Applies exponential decay to the learning rate.
 |      
 |      When training a model, it is often recommended to lower the learning rate as
 |      the training progresses. This schedule applies an exponential decay function
 |      to an optimizer step, given a provided initial learning rate.
 |      
 |      The schedule a 1-arg callable that produces a decayed learning
 |      rate when passed the current optimizer step. This can be useful for changing
 |      

In [39]:
tf.math.pow(2.1,3)

<tf.Tensor: id=605, shape=(), dtype=float32, numpy=9.260999>