## 1.导入依赖以及网络结构

In [1]:
# 加载网络结构
%run AI2Flutter.py

2023-04-19 18:41:47.860030: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-19 18:41:48.821623: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-19 18:41:49.246150: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-04-19 18:41:51.382457: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

## 2.实例化网络，并设置模型输入形状

In [13]:
# 超参数
num_layers = 4
d_model = 256
dff = 2048
num_heads = 8
dropout_rate = 0.1
input_vocab = 1000  # 每个数字在[0,999]之间，997表示起点，998表示生成终点，999表示节点之间的分隔符
output_vocab = 1000  # 

# 权重保存位置
save_weight_path = "./model_weight/model_1"
save_path = "./model/model_1"

transformer = Transformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    input_vocab=input_vocab,
    output_vocab=output_vocab,
    dropout_rate=dropout_rate)

# 使用996作为填充，故考虑loss时不考虑996
def masked_loss(label, pred):
  print(label)  
  mask = label == 996
  print(mask)
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')
  loss = loss_object(label, pred)

  mask = tf.cast(mask, dtype=loss.dtype)
  loss *= mask

  loss = tf.reduce_sum(loss)/tf.reduce_sum(mask)
  return loss

def masked_accuracy(label, pred):
  pred = tf.argmax(pred, axis=2)
  label = tf.cast(label, pred.dtype)
  match = label == pred

  mask = label != 996

  match = match & mask

  match = tf.cast(match, dtype=tf.float32)
  mask = tf.cast(mask, dtype=tf.float32)
  return tf.reduce_sum(match)/tf.reduce_sum(mask)

# 优化器采用Adam，学习率自定义
learning_rate = CustomSchedule(d_model)
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)
transformer.compile(
    loss=masked_loss,
    optimizer=optimizer,
    metrics=[masked_accuracy]
)
# 设置模型输入形状
transformer((tf.keras.layers.Input(shape=(None,)),
             tf.keras.layers.Input(shape=(None,))))
# 网络概览
transformer.summary()

Model: "transformer_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_2 (Encoder)         multiple                  12877824  
                                                                 
 decoder_2 (Decoder)         multiple                  21294080  
                                                                 
 dense_50 (Dense)            multiple                  257000    
                                                                 
Total params: 34,428,904
Trainable params: 34,428,904
Non-trainable params: 0
_________________________________________________________________


## 3.加载已经训练的权重，方便继续训练

In [3]:
# 加载已训练权重
transformer.load_weights(save_weight_path)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x139ea2c40>

## 4.加载数据集训练网络

In [10]:
%run AI2Flutter_demo_data.py
# 数据规模
train_seqs_num = 1000
validation_seqs_num = 100
# 生成随机数据集
input_data, output_data, output_label = demo_generate_data(train_seqs_num)
vali_input_data, vali_output_data, vali_output_label = demo_generate_data(validation_seqs_num)

# 训练集
train_input = tf.data.Dataset.from_generator(
    lambda: input_data, 
    output_signature=(
        tf.TensorSpec(shape=(None,), dtype=tf.float32)))
train_output = tf.data.Dataset.from_generator(
    lambda: output_data, 
    output_signature=(
        tf.TensorSpec(shape=(None,), dtype=tf.float32)))
train_label = tf.data.Dataset.from_generator(
    lambda: output_label, 
    output_signature=(
        tf.TensorSpec(shape=(None,), dtype=tf.float32)))
train_dataset = tf.data.Dataset.zip(((train_input, train_output), train_label))
# batch设置
train_dataset = train_dataset.padded_batch(2, padding_values=996.0)

# 验证集
vali_input = tf.data.Dataset.from_generator(
    lambda: vali_input_data, 
    output_signature=(
        tf.TensorSpec(shape=(None,), dtype=tf.float32)))
vali_output = tf.data.Dataset.from_generator(
    lambda: vali_output_data, 
    output_signature=(
        tf.TensorSpec(shape=(None,), dtype=tf.float32)))
vali_label = tf.data.Dataset.from_generator(
    lambda: vali_output_label, 
    output_signature=(
        tf.TensorSpec(shape=(None,), dtype=tf.float32)))
vali_dataset = tf.data.Dataset.zip(((vali_input, vali_output), vali_label))
vali_dataset = vali_dataset.padded_batch(1, padding_values=996.0)

In [14]:
# 训练
transformer.fit(
    x=train_dataset,
    epochs=4,
    validation_data=vali_dataset
)

Epoch 1/4
Tensor("IteratorGetNext:2", shape=(None, None), dtype=float32)
Tensor("masked_loss/NotEqual:0", shape=(None, None), dtype=bool)
Tensor("IteratorGetNext:2", shape=(None, None), dtype=float32)
Tensor("masked_loss/NotEqual:0", shape=(None, None), dtype=bool)
    499/Unknown - 20s 25ms/step - loss: 3.6569 - masked_accuracy: 0.4402Tensor("IteratorGetNext:2", shape=(None, None), dtype=float32)
Tensor("masked_loss/NotEqual:0", shape=(None, None), dtype=bool)
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7fa4982fc2e0>

In [None]:
# num_layers = 2
# d_model = 128
# dff = 512
# num_heads = 8
# dropout_rate = 0.1
# input_node_dim = 24
# target_node_dim = 24
# Model: "transformer_1"
# _________________________________________________________________
#  Layer (type)                Output Shape              Param #   
# =================================================================
#  encoder_1 (Encoder)         multiple                  1322624   
                                                                 
#  decoder_1 (Decoder)         multiple                  2378112   
                                                                 
#  dense_29 (Dense)            multiple                  3096      
                                                                 
# =================================================================
# Total params: 3,703,832
# Trainable params: 3,703,832
# Non-trainable params: 0
# _________________________________________________________________
# 1. 1000(16) 100轮 loss: 1100->255
# 2. 1000(8) 100轮 loss: 281->233
# 3. 1000(4) 100轮 loss: 277->206
# 4. 2000(2) 100轮 loss: 272->244 在12-36个epoch降不下去，像是batch太少
# 5. 1000(8) 100轮 loss: 265->191 再100轮 loss:190->146 再100轮 loss:147->119
# 6. 1000(2) 10轮 loss: 304->239 又被打乱了
# 7. 1000(16) 400轮 loss: 255->102 期间在100附近震荡很多次
# 8. 100(4) 100轮 loss: 373->27
# 9. 100(4) 100轮 loss: 485->36
# 10. 100(4) 100轮 loss: 458->45
# 11. 500(8) 100轮 loss: 438->117

# 注意，transformer由于有填充，损失函数是不考虑这部分的。
# 其次，序列通过均方来作为损失函数，进行回归是不是有问题，很难收敛，
# 考虑到注意力机制是观察另一个向量，是不是应该用分类，而不是向量的回归，

# 此外，我们要AI做到一是布局方式，二是节点裁剪，这里是不是不需要让AI映射每一个值？

## 5.使用网络预测

In [99]:
def d2c(schema, flutter, max_length=100):
    '''
    schema: 设计稿schema, 为一个一维向量
    flutter: 实际Flutter节点输出，为一个一维向量，用于测试翻译的结果
    
    特殊数字：996填充，997开始，998结束，999节点分割符
    '''
    # 组织batch为1结构，作为网络输入输出
    input1 = tf.constant([schema])
    predict = [[997]]
    for i in range(max_length):
        p = transformer((input1, tf.constant(predict)), training=False)
        p = p[:, -1:, :]
        p_id = tf.argmax(p, axis=-1)[0].numpy().tolist()[0]
        predict[0].append(p_id)
        if (p_id == 998):
            break
    predict = predict[0][1:]
    print("schema: ", schema)
    print("flutter length: ", len(flutter), "value: ", flutter)
    print("predict length: ", len(predict), "value: ", predict)
    print("distance: ", [flutter[i] - predict[i] for i in range(min(len(flutter), len(predict)))])

schema:  [1, 0, 0, 41, 99, 29, 52, 86, 86, 52, 24, 37]
flutter length:  11 value:  [2, 1, 0, 29, 52, 86, 86, 52, 24, 37, 998]
predict length:  11 value:  [2, 1, 0, 29, 52, 52, 52, 52, 24, 37, 998]
distance:  [0, 0, 0, 0, 0, 34, 34, 0, 0, 0, 0]


In [None]:
# 随机生成一个进行预测
input1, input2, output = demo_generate_data(1)
d2c(input1[0], output[0])

In [96]:
schema = [1, 56, 71, 250, 787, 26, 152, 118, 86, 120, 56, 78]
flutter = [1, 1, 0, 56, 71, 0, 0, 999, 2, 2, 1, 26, 152, 118, 86, 120, 56, 78, 998]
d2c(schema, flutter)

schema:  [1, 56, 71, 250, 787, 26, 152, 118, 86, 120, 56, 78]
flutter length:  19 value:  [1, 1, 0, 56, 71, 0, 0, 999, 2, 2, 1, 26, 152, 118, 86, 120, 56, 78, 998]
predict length:  19 value:  [1, 1, 0, 56, 71, 0, 0, 999, 2, 2, 1, 26, 152, 118, 86, 120, 56, 78, 998]
distance:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


## 6.保存模型的权重，方便下一次训练

In [15]:
#保存训练权重
transformer.save_weights(save_weight_path)

## 7.保存整个模型，方便迁移到其他地方

In [16]:
# 直接加载模型
# transformer = tf.saved_model.load("model2")
# 保存模型
tf.saved_model.save(transformer, save_path)



INFO:tensorflow:Assets written to: ./model/model_1/assets


INFO:tensorflow:Assets written to: ./model/model_1/assets
