## 1.导入依赖以及网络结构

In [9]:
# 加载网络结构
%run AI2Flutter.py

## 2.实例化网络，并设置模型输入形状

In [10]:
# 超参数
num_layers = 2
d_model = 128
dff = 512
num_heads = 8
dropout_rate = 0.1
input_node_dim = 24  # demo中均使用24维向量
target_node_dim = 24  # 

# 权重保存位置
save_weight_path = "./model_weight/model_1"
save_path = "./model/model_1"

transformer = Transformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    input_node_dim=input_node_dim,
    target_node_dim=target_node_dim,
    dropout_rate=dropout_rate)

# 优化器采用Adam，学习率自定义
learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)
transformer.compile(
    loss='mean_squared_error',
    optimizer=optimizer,
)
# 设置模型输入形状
transformer((tf.keras.layers.Input(shape=(None, input_node_dim,)),
             tf.keras.layers.Input(shape=(None, target_node_dim,))))
# 网络概览
transformer.summary()

Model: "transformer_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_1 (Encoder)         multiple                  1322624   
                                                                 
 decoder_1 (Decoder)         multiple                  2378112   
                                                                 
 dense_29 (Dense)            multiple                  3096      
                                                                 
Total params: 3,703,832
Trainable params: 3,703,832
Non-trainable params: 0
_________________________________________________________________


## 3.加载已经训练的权重，方便继续训练

In [3]:
# 加载已训练权重
transformer.load_weights(save_weight_path)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x139ea2c40>

## 4.加载数据集训练网络

In [36]:
%run AI2Flutter_demo_data.py
# 数据规模
train_seqs_num = 500
validation_seqs_num = 30
# 生成随机数据集
input_data, output_data, output_label = demo_generate_data(train_seqs_num)
vali_input_data, vali_output_data, vali_output_label = demo_generate_data(validation_seqs_num)

# 训练集
train_input = tf.data.Dataset.from_generator(
    lambda: input_data, 
    output_signature=(
        tf.TensorSpec(shape=(None, input_node_dim), dtype=tf.float32)))
train_output = tf.data.Dataset.from_generator(
    lambda: output_data, 
    output_signature=(
        tf.TensorSpec(shape=(None, target_node_dim), dtype=tf.float32)))
train_label = tf.data.Dataset.from_generator(
    lambda: output_label, 
    output_signature=(
        tf.TensorSpec(shape=(None, target_node_dim), dtype=tf.float32)))
train_dataset = tf.data.Dataset.zip(((train_input, train_output), train_label))
# batch设置
train_dataset = train_dataset.padded_batch(8)

# 验证集
vali_input = tf.data.Dataset.from_generator(
    lambda: vali_input_data, 
    output_signature=(
        tf.TensorSpec(shape=(None, input_node_dim), dtype=tf.float32)))
vali_output = tf.data.Dataset.from_generator(
    lambda: vali_output_data, 
    output_signature=(
        tf.TensorSpec(shape=(None, target_node_dim), dtype=tf.float32)))
vali_label = tf.data.Dataset.from_generator(
    lambda: vali_output_label, 
    output_signature=(
        tf.TensorSpec(shape=(None, target_node_dim), dtype=tf.float32)))
vali_dataset = tf.data.Dataset.zip(((vali_input, vali_output), vali_label))
vali_dataset = vali_dataset.padded_batch(1)

In [37]:
# 训练
transformer.fit(
    x=train_dataset,
    epochs=100,
#     validation_data=train_dataset
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f67a4ba9220>

In [None]:
# num_layers = 2
# d_model = 128
# dff = 512
# num_heads = 8
# dropout_rate = 0.1
# input_node_dim = 24
# target_node_dim = 24
# Model: "transformer_1"
# _________________________________________________________________
#  Layer (type)                Output Shape              Param #   
# =================================================================
#  encoder_1 (Encoder)         multiple                  1322624   
                                                                 
#  decoder_1 (Decoder)         multiple                  2378112   
                                                                 
#  dense_29 (Dense)            multiple                  3096      
                                                                 
# =================================================================
# Total params: 3,703,832
# Trainable params: 3,703,832
# Non-trainable params: 0
# _________________________________________________________________
# 1. 1000(16) 100轮 loss: 1100->255
# 2. 1000(8) 100轮 loss: 281->233
# 3. 1000(4) 100轮 loss: 277->206
# 4. 2000(2) 100轮 loss: 272->244 在12-36个epoch降不下去，像是batch太少
# 5. 1000(8) 100轮 loss: 265->191 再100轮 loss:190->146 再100轮 loss:147->119
# 6. 1000(2) 10轮 loss: 304->239 又被打乱了
# 7. 1000(16) 400轮 loss: 255->102 期间在100附近震荡很多次
# 8. 100(4) 100轮 loss: 373->27
# 9. 100(4) 100轮 loss: 485->36
# 10. 100(4) 100轮 loss: 458->45
# 11. 500(8) 100轮 loss: 438->117

# 注意，transformer由于有填充，损失函数是不考虑这部分的。其次，序列通过均方来作为损失函数，进行回归是不是有问题，是不是应该用分类？
# 考虑到注意力机制是观察另一个向量，是不是应该用分类，而不是向量的回归，
# 这个难度比值的回归难太多了，深圳我怀疑比nlp都困难，主要是根本收敛不了。。
# 此外，我们要AI做到一是布局方式，二是节点裁剪，这里是不是不需要让AI映射每一个值？

## 5.使用网络预测

In [62]:
# # 预测，基于原先的全是1的样本，网络似乎学会了把任何数字全部映射为1
# input1 = tf.zeros((1,5,24))
# input2 = tf.constant([[[
#     12526,-52,-97877773352,45,5,6,7,8,9,10,-22099995,-12,13,999,15,167866687,17,18,19,10,0,0,0,0
# ]]])
# # input2 = tf.ones((1, 1, 24))
# # input2 = tf.zeros((1, 1, 24))
# input2 = tf.cast(input2, tf.float32)
# re = transformer((input1, input2), training=False)
# print(input2)
# print(re)
# print(tf.reduce_sum(tf.abs(re - tf.ones((1, 1, 24)))))

input1, input2, output = demo_generate_data(1)
start = [[[-1 for i in range(output_seq_dim)]]]
p1 = transformer((tf.constant(input1), tf.constant(start)), training=False)
print(tf.round(p1))
print(output[0][0])
print("***************")
d = np.array(p1 - output[0][0])
print(d)
print(np.sum(d**2))

tf.Tensor(
[[[ 0.  0. -1.  8. 33. -5. 28. -6.  1.  1. -1. -1. -1. -1. -1. -1. -1.
   -1. -1. -1. -1. -1. -1. -1.]]], shape=(1, 1, 24), dtype=float32)
[2, 2, 1, 19, 255, 0, 255, 0, 24, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
***************
[[[  -2.230118     -2.2769947    -1.7701182   -10.594133   -222.26733
     -4.714891   -227.40027      -6.48445     -22.81416     -18.860962
     -0.79332674   -0.7928952    -0.7933324    -0.7934217    -0.7935638
     -0.79332376   -0.792983     -0.79374325   -0.7929125    -0.7930898
     -0.7934303    -0.79340845   -0.7934934    -0.7929172 ]]]
102188.484


## 6.保存模型的权重，方便下一次训练

In [15]:
#保存训练权重
transformer.save_weights(save_weight_path)

## 7.保存整个模型，方便迁移到其他地方

In [16]:
# 直接加载模型
# transformer = tf.saved_model.load("model2")
# 保存模型
tf.saved_model.save(transformer, save_path)



INFO:tensorflow:Assets written to: ./model/model_1/assets


INFO:tensorflow:Assets written to: ./model/model_1/assets
