In [None]:
!nvidia-smi # 命令查看 GPU 状态

print("GPU 是否可用：", tf.test.is_gpu_available())

Thu Apr 24 10:00:59 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   37C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


GPU 是否可用： True


In [None]:
import tensorflow as tf
import numpy as np
import pickle
from tensorflow.keras.layers import Embedding, MultiHeadAttention, Dense, Input, Dropout, LayerNormalization

In [None]:
def get_angles(pos, k, d):
    i = k // 2
    angles = pos/(np.power(10000, (2*i/d)))
    return angles
def positional_encoding(positions, d):
    angle_rads = get_angles(np.arange(positions)[:, np.newaxis],np.arange(d)[np.newaxis,:],d)
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)
def create_padding_mask_nmt(decoder_token_ids):
    seq = 1 - tf.cast(tf.math.equal(decoder_token_ids, 36), tf.float32)
    return seq[:, tf.newaxis, tf.newaxis, :]
def create_look_ahead_mask(sequence_length):
    mask = tf.linalg.band_part(tf.ones((1, sequence_length, sequence_length)), -1, 0)
    return mask
def FullyConnected(embedding_dim, fully_connected_dim):
    return tf.keras.Sequential([tf.keras.layers.Dense(fully_connected_dim, activation='relu'),
                                tf.keras.layers.Dense(embedding_dim)])

In [None]:
with open('/content/drive/MyDrive/Colab Notebooks/variables.pkl', 'rb') as f:
    data = pickle.load(f)
dataset = data['dataset']
human_vocab = data['human_vocab']
machine_vocab = data['machine_vocab']
inv_machine_vocab = data['inv_machine_vocab']
X = data['X']
Y = data['Y']
Xoh = data['Xoh']
Yoh = data['Yoh']

In [None]:
index = 0
print("Source date:", dataset[index][0])
print("Target date:", dataset[index][1])
print("Source after preprocessing (indices):", X[index])
print("Target after preprocessing (indices):", Y[index])
print("Source after preprocessing (one-hot):", Xoh[index])
print("Target after preprocessing (one-hot):", Yoh[index])

Source date: 9 may 1998
Target date: 1998-05-09
Source after preprocessing (indices): [12  0 24 13 34  0  4 12 12 11 36 36 36 36 36 36 36 36 36 36 36 36 36 36
 36 36 36 36 36 36]
Target after preprocessing (indices): [ 2 10 10  9  0  1  6  0  1 10]
Source after preprocessing (one-hot): [[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 1.]]
Target after preprocessing (one-hot): [[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


In [None]:
class TransformerEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, embedding_dim, num_heads, ffn_dim, dropout_rate=0.1, layernorm_eps=1e-6):
        super().__init__()
        self.mha = MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim // num_heads,
                        dropout=dropout_rate)
        self.layernorm1 = LayerNormalization(epsilon=layernorm_eps)
        self.layernorm2 = LayerNormalization(epsilon=layernorm_eps)
        self.ffn = FullyConnected(embedding_dim, fully_connected_dim=ffn_dim)
        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)
    def call(self, x, enc_padding_mask, training):
        # Multi-head attention + skip connection + LayerNorm
        attn_output = self.mha(x, x, x, attention_mask=enc_padding_mask,
                    training = training)
        # (batch, seq_len, embed_dim)
        attn_output = self.dropout1(attn_output, training = training)
        out1 = self.layernorm1(x + attn_output)
        # Feedforward + skip connection + LayerNorm
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)
        return out2

class TransformerEncoder(tf.keras.layers.Layer):
    def __init__(self, input_vocab_size, max_positional_encoding_input, num_encoder_layers,
                 embedding_dim, num_heads, ffn_dim, dropout_rate=0.1, layernorm_eps=1e-6):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.num_layers = num_encoder_layers
        self.embedding = Embedding(input_vocab_size, self.embedding_dim)
        self.pos_encoding = positional_encoding(max_positional_encoding_input, self.embedding_dim)
        self.encoder_layers = [
            TransformerEncoderLayer(embedding_dim, num_heads, ffn_dim, dropout_rate, layernorm_eps)
            for _ in range(self.num_layers)
        ]
        self.dropout = Dropout(dropout_rate)
    def call(self, x, enc_padding_mask, training):
        seq_len = tf.shape(x)[1]
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.embedding_dim, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]
        x = self.dropout(x, training=training)
        for layer in self.encoder_layers:
            x = layer(x, enc_padding_mask, training=training)
        return x

class TransformerDecoderLayer(tf.keras.layers.Layer):
    def __init__(self, embedding_dim, num_heads, ffn_dim, dropout_rate=0.1, layernorm_eps=1e-6):
        super().__init__()
        self.self_attention = MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim//num_heads,
                                dropout=dropout_rate)
        self.cross_attention = MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim//num_heads,
                                dropout=dropout_rate)
        self.layernorm1 = LayerNormalization(epsilon=layernorm_eps)
        self.layernorm2 = LayerNormalization(epsilon=layernorm_eps)
        self.layernorm3 = LayerNormalization(epsilon=layernorm_eps)
        self.ffn = FullyConnected(embedding_dim=embedding_dim, fully_connected_dim=ffn_dim)
        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)
        self.dropout3 = Dropout(dropout_rate)
    def call(self, x, enc_output, enc_padding_mask, dec_lookahead_mask, training):
        # 1. masked self-attention
        attn1, attn_weights_block1 = self.self_attention(x, x, x, attention_mask = dec_lookahead_mask,
                                training=training, return_attention_scores=True)
        attn1 = self.dropout1(attn1, training = training)
        out1 = self.layernorm1(x + attn1)
        # 2. encoder-decoder attention
        attn2, attn_weights_block2 = self.cross_attention(out1, enc_output, enc_output,
                                attention_mask = enc_padding_mask,
                                training = training, return_attention_scores=True)
        attn2 = self.dropout2(attn2, training=training)
        out2 = self.layernorm2(out1 + attn2)
        # 3. feed-forward
        ffn_output = self.ffn(out2)
        ffn_output = self.dropout3(ffn_output, training=training)
        out3 = self.layernorm3(out2 + ffn_output)
        return out3, attn_weights_block1, attn_weights_block2

class TransformerDecoder(tf.keras.layers.Layer):
    def __init__(self, target_vocab_size, max_positional_encoding_target, num_decoder_layers, embedding_dim,
            num_heads, ffn_dim, dropout_rate=0.1, layernorm_eps=1e-6):
        super(TransformerDecoder, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_layers = num_decoder_layers
        self.embedding = Embedding(target_vocab_size, self.embedding_dim)
        self.pos_encoding = positional_encoding(max_positional_encoding_target, self.embedding_dim)
        self.decoder_layers = [
            TransformerDecoderLayer(embedding_dim, num_heads, ffn_dim, dropout_rate)
            for _ in range(self.num_layers)
        ]
        self.dropout = Dropout(dropout_rate)
    def call(self, x, enc_output, enc_padding_mask, dec_lookahead_mask, training):
        seq_len = tf.shape(x)[1]
        x = self.embedding(x)
        x *= tf.math.sqrt(tf.cast(self.embedding_dim, tf.float32))
        x += self.pos_encoding[:, :seq_len, :]
        x = self.dropout(x, training=training)
        attention_weights = {}
        for i, layer in enumerate(self.decoder_layers):
            x, attn1, attn2 = layer(x, enc_output, enc_padding_mask, dec_lookahead_mask, training=training)
            attention_weights[f"decoder_layer{i+1}_attn1"] = attn1
            attention_weights[f"decoder_layer{i+1}_attn2"] = attn2
        return x, attention_weights

class Transformer(tf.keras.Model):
    def __init__(self, input_vocab_size, target_vocab_size, max_positional_encoding_input,
           max_positional_encoding_target, num_encoder_layers, num_decoder_layers, embedding_dim,
            num_heads, ffn_dim,dropout_rate=0.1, layernorm_eps=1e-6):
        super().__init__()
        self.encoder = TransformerEncoder(input_vocab_size = input_vocab_size,
                        max_positional_encoding_input = max_positional_encoding_input,
                        num_encoder_layers = num_encoder_layers,
                        embedding_dim = embedding_dim,
                        num_heads = num_heads, ffn_dim = ffn_dim,
                        dropout_rate = dropout_rate, layernorm_eps = layernorm_eps)
        self.decoder = TransformerDecoder(target_vocab_size = target_vocab_size,
                        max_positional_encoding_target = max_positional_encoding_target,
                        num_decoder_layers = num_decoder_layers,
                        embedding_dim = embedding_dim,
                        num_heads = num_heads, ffn_dim = ffn_dim,
                        dropout_rate = dropout_rate, layernorm_eps = layernorm_eps)
        self.final_layer = Dense(target_vocab_size)

    def call(self, enc_input, dec_input, enc_padding_mask, dec_lookahead_mask, training):
        # Encoder forward
        enc_output = self.encoder(enc_input, enc_padding_mask, training=training)
        # Decoder forward
        dec_output, attention_weights = self.decoder(dec_input, enc_output, enc_padding_mask,
                                dec_lookahead_mask, training=training)
        logits = self.final_layer(dec_output)
        return logits, attention_weights

In [None]:
x = tf.convert_to_tensor(X[:3], dtype=tf.int32)
print("Encoder Input's shape =", x.shape)   # Shape: (3, 30)
enc_padding_mask = create_padding_mask_nmt(x)
print("enc_padding_mask's shape =", enc_padding_mask.shape)  # TensorShape([3, 1, 1, 30])

machine_vocab["<sos>"] = 11 # sos_id
machine_vocab["<eos>"] = 12 # eos_id
sos_id = 11
eos_id = 12
# 原始 Y 是完整目标，直接 prepend <sos> → (10000, 11)
Y_input = np.concatenate([np.full((Y.shape[0], 1), sos_id), Y], axis=1)
print("Y_input's shape =", Y_input.shape)
# append <eos> → (10000, 11)
Y_target = np.concatenate([Y, np.full((Y.shape[0], 1), eos_id)], axis=1)
print("Y_target's shape =", Y_target.shape)

y = tf.convert_to_tensor(Y_input[:3], dtype=tf.int32)
print("Decoder Input's shape =", y.shape)    # Shape: (3, 11)
dec_lookahead_mask = create_look_ahead_mask(11)
print("dec_lookahead_mask's shape =", dec_lookahead_mask.shape)  # (1,11,11)

Encoder Input's shape = (3, 30)
enc_padding_mask's shape = (3, 1, 1, 30)
Y_input's shape = (10000, 11)
Y_target's shape = (10000, 11)
Decoder Input's shape = (3, 11)
dec_lookahead_mask's shape = (1, 11, 11)


In [None]:
print(human_vocab)
print(machine_vocab)

{' ': 0, '.': 1, '/': 2, '0': 3, '1': 4, '2': 5, '3': 6, '4': 7, '5': 8, '6': 9, '7': 10, '8': 11, '9': 12, 'a': 13, 'b': 14, 'c': 15, 'd': 16, 'e': 17, 'f': 18, 'g': 19, 'h': 20, 'i': 21, 'j': 22, 'l': 23, 'm': 24, 'n': 25, 'o': 26, 'p': 27, 'r': 28, 's': 29, 't': 30, 'u': 31, 'v': 32, 'w': 33, 'y': 34, '<unk>': 35, '<pad>': 36}
{'-': 0, '0': 1, '1': 2, '2': 3, '3': 4, '4': 5, '5': 6, '6': 7, '7': 8, '8': 9, '9': 10, '<sos>': 11, '<eos>': 12}


In [None]:
input_vocab_size = len(human_vocab)
target_vocab_size = len(machine_vocab)

transformer = Transformer(input_vocab_size = input_vocab_size, target_vocab_size = target_vocab_size,
            max_positional_encoding_input = 30, max_positional_encoding_target = 11,
            num_encoder_layers = 2, num_decoder_layers = 2,
            embedding_dim = 32, num_heads = 8, ffn_dim = 64, dropout_rate=0.1, layernorm_eps=1e-6)

logits, attn_weights = transformer(enc_input = x, dec_input = y,
            enc_padding_mask = enc_padding_mask, dec_lookahead_mask = dec_lookahead_mask, training = True)
print(logits.shape)  # should be: (batch_size, target_seq_len, target_vocab_size) (3,11,13)

(3, 11, 13)


In [None]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def loss_function(real, pred, eos_token_id=12):
    mask = tf.cast(real != eos_token_id, dtype=tf.float32)
    loss_ = loss_object(real, pred)  # (batch_size, target_seq_len)
    loss_ *= mask
    return tf.reduce_mean(loss_)

In [None]:
def train_step(enc_input, dec_input, dec_output, enc_padding_mask, dec_lookahead_mask):
    with tf.GradientTape() as tape:
        logits, _ = transformer(enc_input, dec_input, enc_padding_mask, dec_lookahead_mask, training=True)
        loss = loss_function(dec_output, logits, eos_token_id=12)
    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
    return loss

In [None]:
optimizer = tf.keras.optimizers.Adam()

In [None]:
# y = tf.convert_to_tensor(Y_input[:3], dtype=tf.int32)  # Shape: (3, 11)
y_true = tf.convert_to_tensor(Y_target[:3], dtype=tf.int32) # (3,11)

EPOCHS = 10
BATCH_SIZE = 3
for epoch in range(EPOCHS):
    loss = train_step(enc_input = x,
                dec_input = y,               # input 给 decoder
                dec_output = y_true,            # 目标输出（标签）
                enc_padding_mask = enc_padding_mask,
                dec_lookahead_mask = dec_lookahead_mask)
    print(f"Epoch {epoch+1} Loss: {loss.numpy():.2f}")

Epoch 1 Loss: 2.73
Epoch 2 Loss: 2.52
Epoch 3 Loss: 2.39
Epoch 4 Loss: 2.19
Epoch 5 Loss: 2.11
Epoch 6 Loss: 1.96
Epoch 7 Loss: 1.88
Epoch 8 Loss: 1.76
Epoch 9 Loss: 1.75
Epoch 10 Loss: 1.60


In [None]:
def string_to_int(string, length, vocab):
    """
    Converts all strings in the vocabulary into a list of integers representing the positions of the
    input string's characters in the "vocab"

    Arguments:
    string -- input string, e.g. 'Wed 10 Jul 2007'
    length -- the number of time steps you'd like, determines if the output will be padded or cut
    vocab -- vocabulary, dictionary used to index every character of your "string"

    Returns:
    rep -- list of integers (or '<unk>') (size = length) representing the position of the string's character in the vocabulary
    """

    #make lower to standardize
    string = string.lower()
    string = string.replace(',','')

    if len(string) > length:
        string = string[:length]

    rep = list(map(lambda x: vocab.get(x, '<unk>'), string))

    if len(string) < length:
        rep += [vocab['<pad>']] * (length - len(string))

    #print (rep)
    return rep

In [None]:
def indices_to_date(indices, inv_machine_vocab, eos_id=12):
    date_tokens = []
    for idx in indices:
        if idx == eos_id:
            break
        date_tokens.append(inv_machine_vocab.get(idx, '<unk>'))
    return ''.join(date_tokens)

def predict(transformer, enc_input, max_target_len, sos_id, eos_id, target_vocab_size):
    batch_size = tf.shape(enc_input)[0]
    enc_padding_mask = create_padding_mask_nmt(enc_input)  # (batch_size, 1, 1, source_seq_len)
    dec_input = tf.ones((batch_size, 1), dtype=tf.int32) * sos_id  # (batch_size, 1)
    output = []

    for t in range(max_target_len):
        dec_mask = create_look_ahead_mask(tf.shape(dec_input)[1])  # (1, seq_len, seq_len)
        logits, _ = transformer(enc_input, dec_input, enc_padding_mask, dec_mask, training=False)
        last_logits = logits[:, -1, :]  # (batch_size, target_vocab_size)
        predicted_id = tf.argmax(last_logits, axis=-1, output_type=tf.int32)  # (batch_size,)
        output.append(predicted_id.numpy())
        if tf.reduce_all(predicted_id == eos_id):
            break
        predicted_id = tf.expand_dims(predicted_id, 1)  # (batch_size, 1)
        dec_input = tf.concat([dec_input, predicted_id], axis=1)  # (batch_size, seq_len+1)

    output = np.stack(output, axis=1)  # (batch_size, max_target_len)
    return output

In [None]:
# 测试推理
x_test = tf.convert_to_tensor(X[:3], dtype=tf.int32)  # (3, 30)
predictions = predict(transformer, x_test, max_target_len=10, sos_id=11, eos_id=12, target_vocab_size=len(machine_vocab))



In [None]:
# 转换为日期字符串
for i in range(len(predictions)):
    #source_date = ''.join([inv_human_vocab.get(idx, '<unk>') for idx in X[i] if idx != 36])
    source_date = dataset[i][0]
    predicted_date = indices_to_date(predictions[i], inv_machine_vocab, eos_id=12)
    print(f"源日期: {source_date}")
    print(f"预测日期: {predicted_date}")
    print("len(predicted_date) =", len(predicted_date))
    print(f"目标日期: {indices_to_date(Y_target[i], inv_machine_vocab, eos_id=12)}")
    print()

源日期: 9 may 1998
预测日期: 009-111099
len(predicted_date) = 10
目标日期: 1998-05-09

源日期: 10.11.19
预测日期: 199-111199
len(predicted_date) = 10
目标日期: 2019-11-10

源日期: 9/10/70
预测日期: 009-111099
len(predicted_date) = 10
目标日期: 1970-09-10



In [None]:
x = tf.convert_to_tensor(X, dtype=tf.int32)
print("Encoder Input's shape =", x.shape)   # Shape: (m, 30)
enc_padding_mask = create_padding_mask_nmt(x)
print("enc_padding_mask's shape =", enc_padding_mask.shape)  # TensorShape([m, 1, 1, 30])

machine_vocab["<sos>"] = 11 # sos_id
machine_vocab["<eos>"] = 12 # eos_id
sos_id = 11
eos_id = 12
# 原始 Y 是完整目标，直接 prepend <sos> → (10000, 11)
Y_input = np.concatenate([np.full((Y.shape[0], 1), sos_id), Y], axis=1)
print("Y_input's shape =", Y_input.shape)
# append <eos> → (10000, 11)
Y_target = np.concatenate([Y, np.full((Y.shape[0], 1), eos_id)], axis=1)
print("Y_target's shape =", Y_target.shape)

y = tf.convert_to_tensor(Y_input, dtype=tf.int32)
print("Decoder Input's shape =", y.shape)    # Shape: (m, 11)
dec_lookahead_mask = create_look_ahead_mask(11)
print("dec_lookahead_mask's shape =", dec_lookahead_mask.shape)  # (1,11,11)

Encoder Input's shape = (10000, 30)
enc_padding_mask's shape = (10000, 1, 1, 30)
Y_input's shape = (10000, 11)
Y_target's shape = (10000, 11)
Decoder Input's shape = (10000, 11)
dec_lookahead_mask's shape = (1, 11, 11)


In [None]:
y_true = tf.convert_to_tensor(Y_target, dtype=tf.int32) # (m, 11)

BATCH_SIZE = 32
BUFFER_SIZE = 10000

dataset2 = tf.data.Dataset.from_tensor_slices((x, y, y_true, enc_padding_mask))
dataset2 = dataset2.cache()
dataset2 = dataset2.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
transformer = Transformer(input_vocab_size = input_vocab_size, target_vocab_size = target_vocab_size,
            max_positional_encoding_input = 30, max_positional_encoding_target = 11,
            num_encoder_layers = 2, num_decoder_layers = 2,
            embedding_dim = 128, num_heads = 8, ffn_dim = 256, dropout_rate=0.1, layernorm_eps=1e-6)

In [None]:
# 用一小批假的数据构建模型
dummy_x = tf.random.uniform((1, 30), minval=0, maxval=input_vocab_size, dtype=tf.int32)
dummy_y = tf.random.uniform((1, 11), minval=0, maxval=target_vocab_size, dtype=tf.int32)

dummy_enc_mask = create_padding_mask_nmt(dummy_x)
dummy_lookahead_mask = create_look_ahead_mask(11)

_ = transformer(dummy_x, dummy_y, dummy_enc_mask, dummy_lookahead_mask, training=False)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

In [None]:
@tf.function
def train_step(enc_input, dec_input, dec_output, enc_padding_mask, dec_lookahead_mask):
    with tf.GradientTape() as tape:
        logits, _ = transformer(enc_input, dec_input, enc_padding_mask, dec_lookahead_mask, training=True)
        loss = loss_function(dec_output, logits)  # 你自己的 loss 计算函数
    gradients = tape.gradient(loss, transformer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))
    return loss

In [None]:
EPOCHS = 18
for epoch in range(EPOCHS):
    total_loss = 0
    num_batches = 0
    for batch_x, batch_y, batch_y_true, batch_enc_padding_mask in dataset2:
        loss = train_step(
            enc_input = batch_x,
            dec_input = batch_y,
            dec_output = batch_y_true,
            enc_padding_mask = batch_enc_padding_mask,
            dec_lookahead_mask = dec_lookahead_mask  # 可缓存，若长度不变
        )
        total_loss += loss
        num_batches += 1
    print(f"Epoch {epoch+1} Loss: {total_loss / num_batches:.2f}")

Epoch 1 Loss: 1.10
Epoch 2 Loss: 0.36
Epoch 3 Loss: 0.14
Epoch 4 Loss: 0.10
Epoch 5 Loss: 0.08
Epoch 6 Loss: 0.06
Epoch 7 Loss: 0.05
Epoch 8 Loss: 0.04
Epoch 9 Loss: 0.04
Epoch 10 Loss: 0.03
Epoch 11 Loss: 0.03
Epoch 12 Loss: 0.02
Epoch 13 Loss: 0.02
Epoch 14 Loss: 0.02
Epoch 15 Loss: 0.02
Epoch 16 Loss: 0.02
Epoch 17 Loss: 0.01
Epoch 18 Loss: 0.01


In [None]:
EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']

source_list = []
for example in EXAMPLES:
    temp = string_to_int(example, 30, human_vocab)
    temp = tf.convert_to_tensor(temp, dtype=tf.int32)
    temp = tf.expand_dims(temp, 0)
    source_list.append(temp)
# Concatenate all tensors in source_list along axis 0
source_output = tf.concat(source_list, axis=0)
# Print the final tensor and its shape
print("Shape:", source_output.shape)
print(source_output)

Shape: (8, 30)
tf.Tensor(
[[ 6  0 24 13 34  0  4 12 10 12 36 36 36 36 36 36 36 36 36 36 36 36 36 36
  36 36 36 36 36 36]
 [ 8  0 13 27 28 21 23  0  3 12 36 36 36 36 36 36 36 36 36 36 36 36 36 36
  36 36 36 36 36 36]
 [ 5  4 30 20  0 26 18  0 13 31 19 31 29 30  0  5  3  4  9 36 36 36 36 36
  36 36 36 36 36 36]
 [30 31 17  0  4  3  0 22 31 23  0  5  3  3 10 36 36 36 36 36 36 36 36 36
  36 36 36 36 36 36]
 [29 13 30 31 28 16 13 34  0 24 13 34  0 12  0  5  3  4 11 36 36 36 36 36
  36 36 36 36 36 36]
 [24 13 28 15 20  0  6  0  5  3  3  4 36 36 36 36 36 36 36 36 36 36 36 36
  36 36 36 36 36 36]
 [24 13 28 15 20  0  6 28 16  0  5  3  3  4 36 36 36 36 36 36 36 36 36 36
  36 36 36 36 36 36]
 [ 4  0 24 13 28 15 20  0  5  3  3  4 36 36 36 36 36 36 36 36 36 36 36 36
  36 36 36 36 36 36]], shape=(8, 30), dtype=int32)


In [None]:
# 测试推理
predictions2 = predict(transformer, source_output, max_target_len=10, sos_id=11, eos_id=12,
                       target_vocab_size=len(machine_vocab))



In [None]:
# 转换为日期字符串
for i in range(len(predictions2)):
    #source_date = ''.join([inv_human_vocab.get(idx, '<unk>') for idx in X[i] if idx != 36])
    source_date = EXAMPLES[i]
    predicted_date = indices_to_date(predictions2[i], inv_machine_vocab, eos_id=12)
    print(f"源日期: {source_date}")
    print(f"预测日期: {predicted_date}")
    print("len(predicted_date) =", len(predicted_date))
    #print(f"目标日期: {indices_to_date(Y_target[i], inv_machine_vocab, eos_id=12)}")
    print()

源日期: 3 May 1979
预测日期: 1979-05-03
len(predicted_date) = 10

源日期: 5 April 09
预测日期: 2009-04-05
len(predicted_date) = 10

源日期: 21th of August 2016
预测日期: 2016-08-21
len(predicted_date) = 10

源日期: Tue 10 Jul 2007
预测日期: 2007-07-10
len(predicted_date) = 10

源日期: Saturday May 9 2018
预测日期: 2018-05-09
len(predicted_date) = 10

源日期: March 3 2001
预测日期: 2001-03-03
len(predicted_date) = 10

源日期: March 3rd 2001
预测日期: 2001-03-31
len(predicted_date) = 10

源日期: 1 March 2001
预测日期: 2001-03-01
len(predicted_date) = 10



In [None]:
with open('/content/drive/MyDrive/Colab Notebooks/test_sameasLSTM_variables.pkl', 'rb') as f:
    data = pickle.load(f)
test_sameasLSTM_dataset = data['test_sameasLSTM_dataset']
test_sameasLSTM_human_vocab = data['test_sameasLSTM_human_vocab']
test_sameasLSTM_machine_vocab = data['test_sameasLSTM_machine_vocab']
test_sameasLSTM_inv_machine_vocab = data['test_sameasLSTM_inv_machine_vocab']

In [None]:
test_sameasLSTM_EXAMPLES = [i[0] for i in test_sameasLSTM_dataset]

source_list = []
for example in test_sameasLSTM_EXAMPLES:
    temp = string_to_int(example, 30, human_vocab)
    temp = tf.convert_to_tensor(temp, dtype=tf.int32)
    temp = tf.expand_dims(temp, 0)
    source_list.append(temp)
# Concatenate all tensors in source_list along axis 0
source_output = tf.concat(source_list, axis=0)
# Print the final tensor and its shape
print("Shape:", source_output.shape)
#print(source_output)

Shape: (2000, 30)


In [None]:
# 测试推理
predictions3 = predict(transformer, source_output, max_target_len=10, sos_id=11, eos_id=12,
                       target_vocab_size=len(machine_vocab))



In [None]:
# 转换为日期字符串
predicted_dates = []
for i in range(len(predictions3)):
    #source_date = ''.join([inv_human_vocab.get(idx, '<unk>') for idx in X[i] if idx != 36])
    #source_date = test_sameasLSTM_EXAMPLES[i]
    predicted_date = indices_to_date(predictions3[i], inv_machine_vocab, eos_id=12)
    predicted_dates.append(predicted_date)

test_sameasLSTM_GROUND_TRUTH = [i[1] for i in test_sameasLSTM_dataset]

correct = sum(p == t for p, t in zip(predicted_dates, test_sameasLSTM_GROUND_TRUTH))
accuracy = correct / len(test_sameasLSTM_GROUND_TRUTH)
print(f"\n准确率（Exact Match Accuracy）: {accuracy:.2%}")


准确率（Exact Match Accuracy）: 98.50%


In [None]:
transformer.save_weights("transformer_epoch18.weights.h5")

In [None]:
from google.colab import files
files.download("transformer_epoch18.weights.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>