In [2]:
import pandas as pd 
import numpy as np 
import os
import data_embedding, data_read, modules_test
import random
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Conv1D, Flatten
from sklearn.preprocessing import OneHotEncoder
import time


Total data volume: 23436
Train data volume: 16404 Valuate data volume: 4686 Teat data volume: 2343


In [3]:
seq_length = 1024
line_num = 1000

# Data
X_data, y_data = data_read.data_read(seq_length, line_num)
X_data = data_embedding.data_embedding(X_data, seq_length)
print("Total data volume: {}".format(len(X_data)))

# Shuffle
Data = list(zip(X_data, y_data))
random.shuffle(Data)
X_data, y_data = zip(*Data)
X_data, y_data = np.array(X_data), np.array(y_data)

# Data split
X_train, y_train = X_data[0:int(len(X_data)*0.7)-1], y_data[0:int(len(y_data)*0.7)-1]
X_valuate, y_valuate = X_data[int(len(X_data)*0.7):int(len(X_data)*0.9)-1], y_data[int(len(X_data)*0.7):int(len(X_data)*0.9)-1]
X_test, y_test = X_data[int(len(X_data)*0.9):len(X_data)-1], y_data[int(len(X_data)*0.9):len(y_data)-1]
print("Train data volume: {}".format(len(X_train)), "Valuate data volume: {}".format(len(X_valuate)), "Teat data volume: {}".format(len(X_test)))


Total data volume: 23436
Train data volume: 16404 Valuate data volume: 4686 Teat data volume: 2343


In [4]:
# Hyperparameters
batch_size = 128
lr = 0.0001
hidden_units = seq_length // 8
maxlen = 8
num_blocks = 3
num_epochs = 300
num_heads = 8
dropout_rate = 0.1
lambda_loss_amount = 0.0015

class NormalizeLayer(tf.keras.layers.Layer):
    def __init__(self, epsilon=1e-8):
        super(NormalizeLayer, self).__init__()
        self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=epsilon)

    def call(self, inputs):
        return self.layer_norm(inputs)

class MultiheadAttentionLayer(tf.keras.layers.Layer):
    def __init__(self, num_units, num_heads=num_heads, dropout_rate=dropout_rate):
        super(MultiheadAttentionLayer, self).__init__()
        self.num_units = num_units
        self.num_heads = num_heads
        self.dropout_rate = dropout_rate
        self.dense_Q = tf.keras.layers.Dense(num_units, activation=tf.nn.relu)
        self.dense_K = tf.keras.layers.Dense(num_units, activation=tf.nn.relu)
        self.dense_V = tf.keras.layers.Dense(num_units, activation=tf.nn.relu)
        self.normalize = NormalizeLayer()

    def split_heads(self, x):
        return tf.concat(tf.split(x, self.num_heads, axis=-1), axis=0)

    def merge_heads(self, x):
        return tf.concat(tf.split(x, self.num_heads, axis=0), axis=-1)

    def call(self, queries, keys):
        Q = self.dense_Q(queries)
        K = self.dense_K(keys)
        V = self.dense_V(keys)

        Q_ = self.split_heads(Q)
        K_ = self.split_heads(K)
        V_ = self.split_heads(V)

        scores = tf.matmul(Q_, K_, transpose_b=True) / tf.math.sqrt(tf.cast(K_.shape[-1], tf.float32))
        attention_weights = tf.nn.softmax(scores)

        query_masks = tf.cast(tf.reduce_sum(tf.abs(queries), axis=-1, keepdims=True) > 0, tf.float32)
        query_masks = tf.tile(query_masks, [self.num_heads, 1, tf.shape(keys)[1]])
        attention_weights *= query_masks

        outputs = tf.matmul(attention_weights, V_)
        outputs = self.merge_heads(outputs)
        outputs += queries

        return self.normalize(outputs)

class FeedForwardLayer(tf.keras.layers.Layer):
    def __init__(self, num_units):
        super(FeedForwardLayer, self).__init__()
        self.conv1 = tf.keras.layers.Conv1D(filters=num_units[0], kernel_size=1, activation=tf.nn.relu, use_bias=True)
        self.conv2 = tf.keras.layers.Conv1D(filters=num_units[1], kernel_size=1, activation=None, use_bias=True)
        self.normalize = NormalizeLayer()

    def call(self, inputs):
        outputs = self.conv1(inputs)
        outputs = self.conv2(outputs)
        outputs += inputs
        return self.normalize(outputs)

def one_hot_encoding(y_):
    encoder = OneHotEncoder(sparse_output=False, categories='auto')
    y_ = y_.reshape(-1, 1)
    return encoder.fit_transform(y_)

class MultiheadAttentionLayerWrapper(tf.keras.layers.Layer):
    def __init__(self, num_units, num_heads=num_heads, dropout_rate=dropout_rate):
        super(MultiheadAttentionLayerWrapper, self).__init__()
        self.multihead_attention = MultiheadAttentionLayer(num_units, num_heads, dropout_rate)

    def call(self, inputs):
        queries, keys = inputs
        return self.multihead_attention(queries, keys)

# Define model
def build_model():
    inputs = tf.keras.Input(shape=(maxlen, hidden_units))
    enc = inputs
    for _ in range(num_blocks):
        enc = MultiheadAttentionLayerWrapper(hidden_units)([enc, enc])
    outputs = tf.keras.layers.Dense(6, activation='softmax')(tf.keras.layers.Flatten()(enc))
    return tf.keras.Model(inputs=inputs, outputs=outputs)

model = build_model()
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

# Use existing data
y_train = one_hot_encoding(y_train)
y_val = one_hot_encoding(y_valuate)
y_test = one_hot_encoding(y_test)

# Define X_val
X_val = X_valuate

# Train the model
time_start = time.time()
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=batch_size, epochs=num_epochs)
time_end = time.time()
train_time = time_end - time_start
print(f"Training time: {train_time:.3f}s")

# Evaluate on test data
test_time_start = time.time()
test_loss, test_acc = model.evaluate(X_test, y_test)
test_time_end = time.time()
test_time = test_time_end - test_time_start
print(f"Test Accuracy: {test_acc:.5f}, Test Time: {test_time:.3f}s, Train Time: {train_time:.3f}s")

# Save the model
model.save("transformer_model.h5")


Epoch 1/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 44ms/step - accuracy: 0.2049 - loss: 2.1160 - val_accuracy: 0.3020 - val_loss: 1.7460
Epoch 2/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 42ms/step - accuracy: 0.3413 - loss: 1.6548 - val_accuracy: 0.4539 - val_loss: 1.4288
Epoch 3/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 44ms/step - accuracy: 0.4802 - loss: 1.3351 - val_accuracy: 0.5717 - val_loss: 1.1358
Epoch 4/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 46ms/step - accuracy: 0.5888 - loss: 1.0731 - val_accuracy: 0.6466 - val_loss: 0.8983
Epoch 5/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 43ms/step - accuracy: 0.6757 - loss: 0.8518 - val_accuracy: 0.7172 - val_loss: 0.7289
Epoch 6/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 43ms/step - accuracy: 0.7475 - loss: 0.6776 - val_accuracy: 0.7682 - val_loss: 0.6070
Epoch 7/300
[1



Test Accuracy: 0.97140, Test Time: 0.500s, Train Time: 1716.335s


In [5]:
%pip install tensorflow==2.9.1

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Define LSTM model
def create_lstm_model(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(128))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    return model

# Create and compile the model
lstm_model = create_lstm_model((X_train.shape[1], X_train.shape[2]), y_train.shape[1])
lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

start_LSTM_train_time = time.time()

# Train the model
lstm_model.fit(X_train, y_train, epochs=300, batch_size=batch_size, validation_data=(X_val, y_val))

end_LSTM_train_time = time.time()

# Evaluate the model

start_LSTM_test_time = time.time()
loss, accuracy = lstm_model.evaluate(X_test, y_test, batch_size=batch_size)
end_LSTM_test_time = time.time()
print(f"Test Accuracy: {accuracy:.4f}")
print(f'time taken to train LSTM model: {end_LSTM_train_time - start_LSTM_train_time}')
print(f'time taken to test LSTM model: {end_LSTM_test_time - start_LSTM_test_time}')

Note: you may need to restart the kernel to use updated packages.
Epoch 1/300


ERROR: Could not find a version that satisfies the requirement tensorflow==2.9.1 (from versions: 2.12.0rc0, 2.12.0rc1, 2.12.0, 2.12.1, 2.13.0rc0, 2.13.0rc1, 2.13.0rc2, 2.13.0, 2.13.1, 2.14.0rc0, 2.14.0rc1, 2.14.0, 2.14.1, 2.15.0rc0, 2.15.0rc1, 2.15.0, 2.15.1, 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0, 2.17.1, 2.18.0rc0, 2.18.0rc1, 2.18.0rc2, 2.18.0, 2.18.1, 2.19.0rc0, 2.19.0)
ERROR: No matching distribution found for tensorflow==2.9.1
  super().__init__(**kwargs)


[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.2512 - loss: 1.7005 - val_accuracy: 0.5512 - val_loss: 1.1083
Epoch 2/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.6539 - loss: 0.9193 - val_accuracy: 0.8067 - val_loss: 0.5190
Epoch 3/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.8175 - loss: 0.5131 - val_accuracy: 0.8423 - val_loss: 0.4277
Epoch 4/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.8832 - loss: 0.3347 - val_accuracy: 0.9187 - val_loss: 0.2500
Epoch 5/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.9103 - loss: 0.2688 - val_accuracy: 0.9257 - val_loss: 0.2163
Epoch 6/300
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9231 - loss: 0.2216 - val_accuracy: 0.9334 - val_loss: 0.1921
Epoch 7/300
[1m129/129[0m 

In [None]:
import tensorflow as tf
import numpy as np
import time
import data_read_2

# ---------------------- #
#       THAM SỐ        #
# ---------------------- #
seq_length = 1024
line_num = 1000

# ---------------------- #
#  ĐỌC VÀ XỬ LÝ DỮ LIỆU  #
# ---------------------- #
# Đọc dữ liệu (giả sử data_read_2 có các hàm data_read và data_embedding)
X_data, y_data, Label = data_read_2.data_read(seq_length, line_num)
X_data, y_data = data_read_2.data_embedding(X_data, y_data, seq_length)
print("Total data volume: {}".format(len(X_data)))

# Trộn dữ liệu: ghép X_data, y_data và Label lại với nhau rồi random.shuffle
Data = list(zip(X_data, y_data, Label))
random.shuffle(Data)
X_data, y_data, Label = zip(*Data)
X_data, y_data, Label = np.array(X_data), np.array(y_data), np.array(Label)

# Tách dữ liệu theo tỷ lệ Train 90%, Val 5%, Test 5%
num_total = len(X_data)
train_end = int(num_total * 0.9) - 1
val_end = int(num_total * 0.95) - 1

X_train = X_data[:train_end]
y_train = y_data[:train_end]

X_val = X_data[train_end:val_end]
y_val = y_data[train_end:val_end]

X_test = X_data[val_end:num_total-1]
y_test = y_data[val_end:num_total-1]
Label_test = Label[val_end:num_total-1]

print("Train data volume: {}, Val data volume: {}, Test data volume: {}"
      .format(len(X_train), len(X_val), len(X_test)))


# ---------------------- #
#    THAM SỐ HYPERPARAMS #
# ---------------------- #
batch_size = 128
lr = 0.0001
hidden_units = seq_length // 8
maxlen = 8
num_encoder_layers = 3
num_decoder_layers = 3
num_epochs = 300
num_heads = 8
dropout_rate = 0.1
lambda_loss_amount = 0.0015

# ---------------------- #
#  CHUẨN BỊ DỮ LIỆU    #
# ---------------------- #
# Ép kiểu và chuyển đổi dữ liệu sang numpy array kiểu float32
X_train = np.array(X_train, dtype=np.float32)
y_train = np.array(y_train, dtype=np.float32)
X_val   = np.array(X_val, dtype=np.float32)
y_val   = np.array(y_val, dtype=np.float32)
X_test  = np.array(X_test, dtype=np.float32)
y_test  = np.array(y_test, dtype=np.float32)

# Tạo tf.data.Dataset cho train, validation và test
train_inputs = (X_train, y_train)
train_targets = y_train
train_dataset = tf.data.Dataset.from_tensor_slices((train_inputs, train_targets))
train_dataset = train_dataset.shuffle(len(X_train)).batch(batch_size)

val_inputs = (X_val, y_val)
val_targets = y_val
val_dataset = tf.data.Dataset.from_tensor_slices((val_inputs, val_targets)).batch(batch_size)

test_inputs = (X_test, y_test)
test_targets = y_test
test_dataset = tf.data.Dataset.from_tensor_slices((test_inputs, test_targets)).batch(32)


# ---------------------- #
#    ĐỊNH NGHĨA MODEL   #
# ---------------------- #

# MultiHeadAttention custom layer
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dropout_rate, causality=False):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
        self.d_k = d_model // num_heads
        
        self.linear_Q = tf.keras.layers.Dense(d_model)
        self.linear_K = tf.keras.layers.Dense(d_model)
        self.linear_V = tf.keras.layers.Dense(d_model)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self.causality = causality

    def call(self, queries, keys, values, training):
        # queries, keys, values: shape (batch, seq_len, d_model)
        batch_size = tf.shape(queries)[0]
        Q = self.linear_Q(queries)  # (batch, seq_len, d_model)
        K = self.linear_K(keys)
        V = self.linear_V(values)
        
        # Tách ra theo số head: chuyển về shape (batch, num_heads, seq_len, d_k)
        def split_heads(x):
            x = tf.reshape(x, (batch_size, -1, self.num_heads, self.d_k))
            return tf.transpose(x, perm=[0, 2, 1, 3])
        Q = split_heads(Q)
        K = split_heads(K)
        V = split_heads(V)
        
        # Tính attention scores với scaled dot-product
        scores = tf.matmul(Q, K, transpose_b=True) / tf.math.sqrt(tf.cast(self.d_k, tf.float32))
        if self.causality:
            # Tạo mask causal: chỉ cho phép truy cập thông tin quá khứ
            seq_len = tf.shape(scores)[-1]
            mask = tf.linalg.band_part(tf.ones((seq_len, seq_len)), -1, 0)
            mask = tf.cast(mask, tf.bool)
            scores = tf.where(mask, scores, tf.fill(tf.shape(scores), -1e9))
        attn = tf.nn.softmax(scores, axis=-1)
        attn = self.dropout(attn, training=training)
        output = tf.matmul(attn, V)  # (batch, num_heads, seq_len, d_k)
        
        # Nối lại các head
        output = tf.transpose(output, perm=[0, 2, 1, 3])  # (batch, seq_len, num_heads, d_k)
        output = tf.reshape(output, (batch_size, -1, self.d_model))  # (batch, seq_len, d_model)
        
        # Kết hợp residual connection và layer normalization
        output = self.layer_norm(output + queries)
        return output

# FeedForward layer
class FeedForward(tf.keras.layers.Layer):
    def __init__(self, d_model, dropout_rate):
        super(FeedForward, self).__init__()
        self.linear1 = tf.keras.layers.Dense(4 * d_model)
        self.linear2 = tf.keras.layers.Dense(d_model)
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
        self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        
    def call(self, x, training):
        residual = x
        x = self.linear1(x)
        x = tf.nn.relu(x)
        x = self.linear2(x)
        x = self.dropout(x, training=training)
        x = self.layer_norm(x + residual)
        return x

# Encoder block
class EncoderBlock(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dropout_rate):
        super(EncoderBlock, self).__init__()
        self.mha = MultiHeadAttention(d_model, num_heads, dropout_rate, causality=False)
        self.ffn = FeedForward(d_model, dropout_rate)
        
    def call(self, x, training):
        x = self.mha(x, x, x, training=training)
        x = self.ffn(x, training=training)
        return x

# Decoder block
class DecoderBlock(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dropout_rate):
        super(DecoderBlock, self).__init__()
        self.mha1 = MultiHeadAttention(d_model, num_heads, dropout_rate, causality=True)
        self.mha2 = MultiHeadAttention(d_model, num_heads, dropout_rate, causality=False)
        self.ffn = FeedForward(d_model, dropout_rate)
        
    def call(self, x, enc_output, training):
        x = self.mha1(x, x, x, training=training)
        x = self.mha2(x, enc_output, enc_output, training=training)
        x = self.ffn(x, training=training)
        return x

# Linear layer cuối cùng
class LinearLayer(tf.keras.layers.Layer):
    def __init__(self, d_model):
        super(LinearLayer, self).__init__()
        self.linear = tf.keras.layers.Dense(d_model)
        self.output_scale = self.add_weight(name="output_scale", shape=(), initializer=tf.keras.initializers.Ones())
        
    def call(self, x):
        return self.linear(x) * self.output_scale

# Mô hình Transformer chính
class TransformerModel(tf.keras.Model):
    def __init__(self, d_model, num_heads, num_encoder_layers, num_decoder_layers, dropout_rate, lambda_loss_amount):
        super(TransformerModel, self).__init__()
        self.encoder_layers = [EncoderBlock(d_model, num_heads, dropout_rate) for _ in range(num_encoder_layers)]
        self.decoder_layers = [DecoderBlock(d_model, num_heads, dropout_rate) for _ in range(num_decoder_layers)]
        self.linear_layer = LinearLayer(d_model)
        self.lambda_loss_amount = lambda_loss_amount
        
    def call(self, inputs, training=False):
        # inputs là một tuple chứa (encoder_input, decoder_input)
        encoder_input, decoder_input = inputs
        for enc in self.encoder_layers:
            encoder_input = enc(encoder_input, training=training)
        enc_output = encoder_input
        for dec in self.decoder_layers:
            decoder_input = dec(decoder_input, enc_output, training=training)
        pred = self.linear_layer(decoder_input)
        return pred


# ---------------------- #
#    KHỞI TẠO MODEL    #
# ---------------------- #
model = TransformerModel(d_model=hidden_units, num_heads=num_heads,
                         num_encoder_layers=num_encoder_layers,
                         num_decoder_layers=num_decoder_layers,
                         dropout_rate=dropout_rate,
                         lambda_loss_amount=lambda_loss_amount)

# Biên dịch model với optimizer và loss (MSE)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
              loss=tf.keras.losses.MeanSquaredError())

# ---------------------- #
#       HUẤN LUYỆN      #
# ---------------------- #
print("Training...\n")
start_time = time.time()
model.fit(train_dataset, epochs=num_epochs, validation_data=val_dataset, verbose=2)
end_time = time.time()
print(f"Training time: {end_time - start_time:.3f} seconds")

# Lưu model (chỉ lưu weights)
model_path = f'./DH2-{seq_length}.weights.h5'
model.save_weights(model_path)
print(f"Model saved as {model_path}")

# Đánh giá trên tập validation
val_loss = model.evaluate(val_dataset, verbose=0)
print(f"Validation Loss: {val_loss:.4f}")

# Tải lại model đã lưu để demo
model_loaded = TransformerModel(d_model=hidden_units, num_heads=num_heads,
                                num_encoder_layers=num_encoder_layers,
                                num_decoder_layers=num_decoder_layers,
                                dropout_rate=dropout_rate,
                                lambda_loss_amount=lambda_loss_amount)
# Đảm bảo xây dựng model bằng cách chạy một batch mẫu:
dummy_encoder = tf.random.uniform((1, X_train.shape[1], hidden_units))
dummy_decoder = tf.random.uniform((1, y_train.shape[1], hidden_units))
_ = model_loaded((dummy_encoder, dummy_decoder))
model_loaded.load_weights(model_path)

# ---------------------- #
#         KIỂM TRA      #
# ---------------------- #
test_time_start = time.time()
all_preds = []
for (enc_input, dec_input), target in test_dataset:
    preds = model_loaded((enc_input, dec_input), training=False)
    all_preds.append(preds)
all_preds = tf.concat(all_preds, axis=0)
test_time_end = time.time()
test_time = test_time_end - test_time_start


# Tính RMSE và MAE
mse = tf.keras.losses.MeanSquaredError()(y_test, all_preds)
rmse = tf.sqrt(mse).numpy()
mae = tf.keras.losses.MeanAbsoluteError()(y_test, all_preds).numpy()

print(f"The final RMSE = {rmse:.4f}, The final MAE = {mae:.4f}")
print(f"Test time: {test_time:.3f} seconds")


TensorFlow version: 2.19.0
23436 23998464 23997440
Total data volume: 23436
Train data volume: 21090, Val data volume: 1172, Test data volume: 1172
Training...

Epoch 1/300
165/165 - 36s - 217ms/step - loss: 0.1325 - val_loss: 0.0519
Epoch 2/300
165/165 - 23s - 140ms/step - loss: 0.0545 - val_loss: 0.0330
Epoch 3/300
165/165 - 24s - 146ms/step - loss: 0.0374 - val_loss: 0.0239
Epoch 4/300
165/165 - 23s - 141ms/step - loss: 0.0299 - val_loss: 0.0208
Epoch 5/300
165/165 - 24s - 147ms/step - loss: 0.0266 - val_loss: 0.0193
Epoch 6/300
165/165 - 24s - 145ms/step - loss: 0.0245 - val_loss: 0.0183
Epoch 7/300
165/165 - 24s - 146ms/step - loss: 0.0230 - val_loss: 0.0174
Epoch 8/300
165/165 - 24s - 148ms/step - loss: 0.0217 - val_loss: 0.0166
Epoch 9/300
165/165 - 24s - 146ms/step - loss: 0.0206 - val_loss: 0.0157
Epoch 10/300
165/165 - 24s - 148ms/step - loss: 0.0196 - val_loss: 0.0149
Epoch 11/300
165/165 - 24s - 147ms/step - loss: 0.0186 - val_loss: 0.0141
Epoch 12/300
165/165 - 25s - 150ms

In [7]:
# Tải lại model đã lưu để demo
model_loaded = TransformerModel(d_model=hidden_units, num_heads=num_heads,
                                num_encoder_layers=num_encoder_layers,
                                num_decoder_layers=num_decoder_layers,
                                dropout_rate=dropout_rate,
                                lambda_loss_amount=lambda_loss_amount)
# Đảm bảo xây dựng model bằng cách chạy một batch mẫu:
dummy_encoder = tf.random.uniform((1, X_train.shape[1], hidden_units))
dummy_decoder = tf.random.uniform((1, y_train.shape[1], hidden_units))
_ = model_loaded((dummy_encoder, dummy_decoder))
model_loaded.load_weights(model_path)


all_preds = []
for (enc_input, dec_input), target in test_dataset:
    preds = model_loaded((enc_input, dec_input), training=False)
    all_preds.append(preds)
all_preds = tf.concat(all_preds, axis=0)



In [8]:
all_preds

<tf.Tensor: shape=(1172, 8, 128), dtype=float32, numpy=
array([[[ 0.05106504, -0.13922325, -0.06660241, ...,  0.83283526,
         -0.12355666,  0.28127617],
        [ 0.67505556, -0.17277564,  0.38824514, ..., -0.03918505,
         -0.51959044, -0.2400989 ],
        [-0.2388668 , -0.41212264, -0.32615992, ..., -0.51913464,
         -0.5511497 , -0.26644287],
        ...,
        [-0.24811828, -0.41918743, -0.22707579, ..., -0.27531305,
         -0.47784632,  0.01447508],
        [-0.250187  , -0.08815622, -0.09252882, ...,  0.013631  ,
          0.14644887,  0.02065367],
        [ 0.19374362, -0.15211338,  0.08849527, ..., -0.24752322,
         -0.3602416 ,  0.05745627]],

       [[ 0.16510765,  0.12859018,  0.10617515, ...,  0.34806132,
          0.34531277,  0.43499717],
        [ 0.46757665,  0.45074418,  0.44492045, ..., -0.1327413 ,
         -0.09381447, -0.1485674 ],
        [-0.1388901 , -0.2078002 , -0.3584356 , ..., -0.25136536,
         -0.09517656, -0.13073829],
        ...

In [9]:
Label_test

array([[0],
       [2],
       [5],
       ...,
       [0],
       [1],
       [3]], dtype=int64)

In [10]:
import pandas as pd

# Chuyển Label_test thành DataFrame
df_label_test = pd.DataFrame(Label_test)

# Lưu DataFrame thành file CSV
df_label_test.to_csv('Label_preds.csv', index=False)

print("File Label_preds.csv đã được lưu thành công.")

File Label_preds.csv đã được lưu thành công.
