In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Cài đặt thư viện

In [2]:
import numpy as np
import pandas as pd
from string import digits
import tensorflow as tf
from pyvi import ViTokenizer
from tensorflow.keras.utils import to_categorical
%matplotlib inline

# Tải dữ liệu

In [3]:
data_train = pd.read_csv("vlsp_sentiment_train.csv", sep='\t')
data_train.columns =['Class', 'Data']
data_test = pd.read_csv("vlsp_sentiment_test.csv", sep='\t')
data_test.columns =['Class', 'Data']

In [4]:
print(data_train.shape)
print(data_test.shape)

(5100, 2)
(1050, 2)


## Dữ liệu train

In [5]:
labels = data_train.iloc[:, 0].values
reviews = data_train.iloc[:, 1].values

In [6]:
encoded_labels = []

for label in labels:
    if label == -1:
        encoded_labels.append([1,0,0])
    elif label == 0:
        encoded_labels.append([0,1,0])
    else:
        encoded_labels.append([0,0,1])

encoded_labels = np.array(encoded_labels)

In [7]:
reviews_processed = []
unlabeled_processed = []
for review in reviews:
    review_cool_one = ''.join([char for char in review if char not in digits])
    reviews_processed.append(review_cool_one)

In [8]:
#Use PyVi for Vietnamese word tokenizer
word_reviews = []
all_words = []
for review in reviews_processed:
    review = ViTokenizer.tokenize(review.lower())
    word_reviews.append(review.split())


In [9]:
EMBEDDING_DIM = 400 # how big is each word vector
MAX_VOCAB_SIZE = 10000 # how many unique words to use (i.e num rows in embedding vector)
MAX_SEQUENCE_LENGTH = 300 # max number of words in a comment to use

In [10]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [11]:
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE, lower=True, char_level=False)
tokenizer.fit_on_texts(word_reviews)
sequences_train = tokenizer.texts_to_sequences(word_reviews)
word_index = tokenizer.word_index


In [12]:
data = pad_sequences(sequences_train, maxlen=MAX_SEQUENCE_LENGTH)
labels = encoded_labels

In [13]:
print('Shape of X train and X validation tensor:',data.shape)
print('Shape of label train and validation tensor:', labels.shape)

Shape of X train and X validation tensor: (5100, 300)
Shape of label train and validation tensor: (5100, 3)


In [14]:
import gensim
from gensim.models import Word2Vec
from gensim.utils import simple_preprocess

from gensim.models.keyedvectors import KeyedVectors

word_vectors = KeyedVectors.load_word2vec_format('vi-model-CBOW.bin', binary=True)

vocabulary_size=min(len(word_index)+1,MAX_VOCAB_SIZE)
embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM))
for word, i in word_index.items():
    if i>=MAX_VOCAB_SIZE:
        continue
    try:
        embedding_vector = word_vectors[word]
        embedding_matrix[i] = embedding_vector
    except KeyError:
        embedding_matrix[i]=np.random.normal(0,np.sqrt(0.25),EMBEDDING_DIM)

del(word_vectors)

from keras.layers import Embedding
embedding_layer = Embedding(vocabulary_size,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            trainable=True)

## Dữ liệu cho đánh giá

In [15]:
labels_test = data_test.iloc[:, 0].values
reviews_test = data_test.iloc[:, 1].values

In [16]:
encoded_labels_test = []

for label_test in labels_test:
    if label_test == -1:
        encoded_labels_test.append([1,0,0])
    elif label_test == 0:
        encoded_labels_test.append([0,1,0])
    else:
        encoded_labels_test.append([0,0,1])

encoded_labels_test = np.array(encoded_labels_test)

In [18]:
reviews_processed_test = []
unlabeled_processed_test = []
for review_test in reviews_test:
    review_cool_one = ''.join([char for char in review_test if char not in digits])
    reviews_processed_test.append(review_cool_one)

In [19]:
#Use PyVi for Vietnamese word tokenizer
word_reviews_test = []
all_words = []
for review_test in reviews_processed_test:
    review_test = ViTokenizer.tokenize(review_test.lower())
    word_reviews_test.append(review_test.split())

In [20]:
sequences_test = tokenizer.texts_to_sequences(word_reviews_test)
data_test = pad_sequences(sequences_test, maxlen=MAX_SEQUENCE_LENGTH)
labels_test = encoded_labels_test

In [21]:
print('Shape of X train and X validation tensor:',data_test.shape)
print('Shape of label train and validation tensor:', labels_test.shape)

Shape of X train and X validation tensor: (1050, 300)
Shape of label train and validation tensor: (1050, 3)


# Train model

In [22]:
from tensorflow.keras.layers import Dense, Input, GlobalMaxPooling1D, BatchNormalization, SeparableConv1D, Conv1D, MaxPooling1D, Embedding, Input, MaxPool1D, Dense, Embedding, Dropout,concatenate, Reshape, Flatten, Concatenate, Dropout, SpatialDropout1D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Conv1D, GlobalMaxPooling1D, Embedding, Dropout, concatenate, Flatten
from tensorflow.keras.optimizers import SGD

In [23]:
sequence_length = data.shape[1]
filter_sizes = [3,4,5]
num_filters = 100
dropout_rate = 0.6
inputs = Input(shape=(sequence_length,))
embedding = embedding_layer(inputs)
num_classes = 3

In [24]:
print(inputs)

KerasTensor(type_spec=TensorSpec(shape=(None, 300), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'")


## 1. CNN code gốc

In [128]:
sequence_length = data.shape[1]
filter_sizes = [3,4,5]
num_filters = 100
dropout_rate = 0.7

inputs = Input(shape=(sequence_length,))
embedding = embedding_layer(inputs)
# reshape = Reshape((sequence_length,EMBEDDING_DIM,1))(embedding)

conv_0 = Conv1D(num_filters, filter_sizes[0],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_1 = Conv1D(num_filters, filter_sizes[1],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_2 = Conv1D(num_filters, filter_sizes[2],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
print(conv_1)
maxpool_0 = MaxPooling1D(sequence_length - filter_sizes[0] + 1, strides=1)(conv_0)
maxpool_1 = MaxPooling1D(sequence_length - filter_sizes[1] + 1, strides=1)(conv_1)
maxpool_2 = MaxPooling1D(sequence_length - filter_sizes[2] + 1, strides=1)(conv_2)

merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2], axis=1)
flatten = Flatten()(merged_tensor)
reshape = Reshape((3*num_filters,))(flatten)
dropout = Dropout(dropout_rate)(flatten)
output = Dense(units=3, activation='softmax',kernel_regularizer=regularizers.l2(0.01))(dropout)

model_org = Model(inputs=inputs, outputs=output, name="CNN_Org")
model_infor = {}
model_infor['CNN_Org'] = model_org

model_org.summary()


KerasTensor(type_spec=TensorSpec(shape=(None, 297, 100), dtype=tf.float32, name=None), name='conv1d_143/Relu:0', description="created by layer 'conv1d_143'")
Model: "CNN_Org"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 conv1d_142 (Conv1D)         (None, 298, 100)             120100    ['embedding[11][0]']          
                                                                                                  
 conv1d_143 (Conv1D)         (Non

## 2. LSTM

In [129]:
from tensorflow.keras.layers import LSTM, SpatialDropout1D, Dense, Dropout

embedding_dropout = SpatialDropout1D(0.2)(embedding)
lstm = LSTM(128, return_sequences=False, dropout=dropout_rate, recurrent_dropout=dropout_rate)(embedding_dropout)
dense = Dense(128, activation='relu')(lstm)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=num_classes, activation='softmax')(drop)

model_lstm = Model(inputs, output, name="LSTM_Simple")
model_lstm.summary()
model_infor['LSTM_Simple'] = model_lstm

Model: "LSTM_Simple"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 300)]             0         
                                                                 
 embedding (Embedding)       (None, 300, 400)          3167600   
                                                                 
 spatial_dropout1d_20 (Spat  (None, 300, 400)          0         
 ialDropout1D)                                                   
                                                                 
 lstm_51 (LSTM)              (None, 128)               270848    
                                                                 
 dense_121 (Dense)           (None, 128)               16512     
                                                                 
 dropout_72 (Dropout)        (None, 128)               0         
                                                       

## 3. Stacked LSTM (2 lớp nối tiếp)

In [130]:
from tensorflow.keras.layers import LSTM, SpatialDropout1D, Dense, Dropout

embedding_dropout = SpatialDropout1D(0.2)(embedding)
lstm_1 = LSTM(256, return_sequences=True, dropout=dropout_rate, recurrent_dropout=dropout_rate)(embedding_dropout)
lstm_2 = LSTM(128, return_sequences=False, dropout=dropout_rate, recurrent_dropout=dropout_rate)(lstm_1)
dense = Dense(128, activation='relu')(lstm_2)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=num_classes, activation='softmax')(drop)

model_stacked_lstm = Model(inputs, output, name="Stacked_LSTM")
model_stacked_lstm.summary()
model_infor['Stacked_LSTM'] = model_stacked_lstm

Model: "Stacked_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 300)]             0         
                                                                 
 embedding (Embedding)       (None, 300, 400)          3167600   
                                                                 
 spatial_dropout1d_21 (Spat  (None, 300, 400)          0         
 ialDropout1D)                                                   
                                                                 
 lstm_52 (LSTM)              (None, 300, 256)          672768    
                                                                 
 lstm_53 (LSTM)              (None, 128)               197120    
                                                                 
 dense_123 (Dense)           (None, 128)               16512     
                                                      

## 4. LSTM mắc song song

In [131]:
embedding_dropout = SpatialDropout1D(0.2)(embedding)

# Hai nhánh LSTM song song
lstm1 = LSTM(256, return_sequences=False, dropout=dropout_rate, recurrent_dropout=dropout_rate)(embedding_dropout)
lstm2 = LSTM(128, return_sequences=False, dropout=dropout_rate, recurrent_dropout=dropout_rate)(embedding_dropout)

# Kết hợp đầu ra hai nhánh
merged = concatenate([lstm1, lstm2])

# Các lớp sau
dense = Dense(128, activation='relu')(merged)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=num_classes, activation='softmax')(drop)

# Tạo mô hình
model_parallel_lstm = Model(inputs, output, name="Parallel_LSTM")
model_parallel_lstm.summary()
model_infor['Parallel_LSTM'] = model_parallel_lstm


Model: "Parallel_LSTM"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 spatial_dropout1d_22 (Spat  (None, 300, 400)             0         ['embedding[11][0]']          
 ialDropout1D)                                                                                    
                                                                                                  
 lstm_54 (LSTM)              (None, 256)                  672768    ['spatial_dropout1

## 5. BiLSTM

In [132]:
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout
from tensorflow.keras.models import Model

# Input và Embedding (giữ nguyên)
embedding_dropout = SpatialDropout1D(0.2)(embedding)

# Chỉ dùng 1 lớp BiLSTM thay thế 2 LSTM song song
bilstm = Bidirectional(
    LSTM(
        units=192,  
        return_sequences=False,
        dropout=dropout_rate,
        recurrent_dropout=dropout_rate
    )
)(embedding_dropout)

# Các lớp phía sau (giữ nguyên)
dense = Dense(128, activation='relu')(bilstm)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=num_classes, activation='softmax')(drop)

# Tạo mô hình
model_single_bilstm = Model(inputs, output, name="Single_BiLSTM")
model_single_bilstm.summary()
model_infor['Single_BiLSTM'] = model_single_bilstm

Model: "Single_BiLSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 300)]             0         
                                                                 
 embedding (Embedding)       (None, 300, 400)          3167600   
                                                                 
 spatial_dropout1d_23 (Spat  (None, 300, 400)          0         
 ialDropout1D)                                                   
                                                                 
 bidirectional_18 (Bidirect  (None, 384)               910848    
 ional)                                                          
                                                                 
 dense_127 (Dense)           (None, 128)               49280     
                                                                 
 dropout_75 (Dropout)        (None, 128)             

## 6. Song song BiLSTM đầu-cuối (Parallel BiLSTM)

In [133]:
from tensorflow.keras.layers import Bidirectional, LSTM, concatenate

embedding_dropout = SpatialDropout1D(0.2)(embedding)

# BiLSTM 1
bilstm1 = Bidirectional(LSTM(128, return_sequences=False, dropout=dropout_rate))(embedding_dropout)

# BiLSTM 2
bilstm2 = Bidirectional(LSTM(64, return_sequences=False, dropout=dropout_rate))(embedding_dropout)

# Kết hợp song song
merged = concatenate([bilstm1, bilstm2])
dense = Dense(128, activation='relu')(merged)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=num_classes, activation='softmax')(drop)

model_parallel_bilstm = Model(inputs, output, name="Parallel_BiLSTM")
model_parallel_bilstm.summary()
model_infor['Parallel_BiLSTM'] = model_parallel_bilstm


Model: "Parallel_BiLSTM"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 spatial_dropout1d_24 (Spat  (None, 300, 400)             0         ['embedding[11][0]']          
 ialDropout1D)                                                                                    
                                                                                                  
 bidirectional_19 (Bidirect  (None, 256)                  541696    ['spatial_dropou

## 7. CNN + LSTM kết hợp (CNN -> LSTM)

In [134]:
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Reshape

# CNN
conv_0 = Conv1D(num_filters, filter_sizes[0],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_1 = Conv1D(num_filters, filter_sizes[1],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_2 = Conv1D(num_filters, filter_sizes[2],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
print(conv_1)
maxpool_0 = MaxPooling1D(sequence_length - filter_sizes[0] + 1, strides=1)(conv_0)
maxpool_1 = MaxPooling1D(sequence_length - filter_sizes[1] + 1, strides=1)(conv_1)
maxpool_2 = MaxPooling1D(sequence_length - filter_sizes[2] + 1, strides=1)(conv_2)

merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2], axis=1)

# LSTM
lstm = LSTM(128, return_sequences=False, dropout=dropout_rate, recurrent_dropout=dropout_rate)(merged_tensor)
lstm2 = LSTM(128, return_sequences=False, dropout=dropout_rate, recurrent_dropout=dropout_rate)(merged_tensor)
merged_tensor = concatenate([lstm, lstm2], axis=1)

# Dense layers
dense = Dense(128, activation='relu')(lstm)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=num_classes, activation='softmax')(drop)

model_cnn_lstm = Model(inputs, output, name="CNN_LSTM")
model_cnn_lstm.summary()
model_infor['CNN_LSTM'] = model_cnn_lstm

KerasTensor(type_spec=TensorSpec(shape=(None, 297, 100), dtype=tf.float32, name=None), name='conv1d_146/Relu:0', description="created by layer 'conv1d_146'")
Model: "CNN_LSTM"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 conv1d_145 (Conv1D)         (None, 298, 100)             120100    ['embedding[11][0]']          
                                                                                                  
 conv1d_146 (Conv1D)         (No

## 8. LSTM + CNN stacked (LSTM trước CNN)

In [None]:
from tensorflow.keras.layers import Input, Embedding, LSTM, Reshape, Conv1D, MaxPooling1D, Dense, Dropout
from tensorflow.keras.models import Model

# LSTM trước
lstm = LSTM(128, return_sequences=True, dropout=dropout_rate, recurrent_dropout=dropout_rate)(embedding)

# CNN sau
conv_0 = Conv1D(num_filters, filter_sizes[0],activation='relu',kernel_regularizer=regularizers.l2(0.01))(lstm)
conv_1 = Conv1D(num_filters, filter_sizes[1],activation='relu',kernel_regularizer=regularizers.l2(0.01))(lstm)
conv_2 = Conv1D(num_filters, filter_sizes[2],activation='relu',kernel_regularizer=regularizers.l2(0.01))(lstm)

print(conv_1)
maxpool_0 = MaxPooling1D(sequence_length - filter_sizes[0] + 1, strides=1)(conv_0)
maxpool_1 = MaxPooling1D(sequence_length - filter_sizes[1] + 1, strides=1)(conv_1)
maxpool_2 = MaxPooling1D(sequence_length - filter_sizes[2] + 1, strides=1)(conv_2)

merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2], axis=1)

flatten = GlobalMaxPooling1D()(merged_tensor)

# Dense layers
dense = Dense(128, activation='relu')(flatten)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=3, activation='softmax')(drop)

# Build model
model_lstm_cnn = Model(inputs, output, name="LSTM_CNN")
model_lstm_cnn.summary()

# Lưu vào dict thông tin
model_infor['LSTM_CNN'] = model_lstm_cnn


KerasTensor(type_spec=TensorSpec(shape=(None, 297, 100), dtype=tf.float32, name=None), name='conv1d_165/Relu:0', description="created by layer 'conv1d_165'")
Model: "LSTM_CNN"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_13 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_13[0][0]']            
                                                                                                  
 lstm_63 (LSTM)              (None, 300, 128)             270848    ['embedding[12][0]']          
                                                                                                  
 conv1d_164 (Conv1D)         (No

## 9. BiLSTM + CNNs song song

In [135]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, concatenate
from tensorflow.keras.layers import Bidirectional, LSTM, Dropout, Dense, BatchNormalization

# Multi-kernel CNN
conv_0 = Conv1D(filters=128,
                kernel_size=3,
                padding='same',
                activation='relu',
                kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_1 = Conv1D(filters=128, kernel_size=4, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_2 = Conv1D(filters=128, kernel_size=5, padding='same', activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_3 = Conv1D(filters=128, kernel_size=6, padding='same', activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)

# MaxPooling
maxpool_0 = GlobalMaxPooling1D()(conv_0)
maxpool_1 = GlobalMaxPooling1D()(conv_1)
maxpool_2 = GlobalMaxPooling1D()(conv_2)
maxpool_3 = GlobalMaxPooling1D()(conv_3)

# BiLSTM
bilstm = Bidirectional(LSTM(128, return_sequences=False))(embedding)

# Kết hợp BiLSTM và CNN
merged = concatenate([bilstm, maxpool_0, maxpool_1, maxpool_2, maxpool_3])

# Thêm BatchNormalization và Dropout
x = BatchNormalization()(merged)
x = Dropout(dropout_rate)(x)

# Dense layer trung gian giúp học tốt hơn
x = Dense(128, activation='relu')(x)
x = Dropout(dropout_rate)(x)

# Output
output = Dense(units=3, activation='softmax')(x)

# Model
model_cnn_bilstm = Model(inputs, output, name="CNN_BiLSTM")
model_cnn_bilstm.summary()
model_infor['CNN_BiLSTM'] = model_cnn_bilstm

Model: "CNN_BiLSTM"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 conv1d_148 (Conv1D)         (None, 300, 128)             153728    ['embedding[11][0]']          
                                                                                                  
 conv1d_149 (Conv1D)         (None, 300, 128)             204928    ['embedding[11][0]']          
                                                                                         

## 10. BiLSTM + CNN chạy tuần tự

In [136]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, concatenate
from tensorflow.keras.layers import Bidirectional, LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras import regularizers

def build_lstm_cnn_model(inputs,
                         embedding,
                         lstm_units=128,
                         cnn_filters=128,
                         dropout_rate=0.7,
                         num_classes=3):
    """
    Xây dựng mô hình với kiến trúc Embedding -> BiLSTM -> CNN.

    Args:

    Returns:
        tensorflow.keras.models.Model: Mô hình Keras.
    """


    # BiLSTM
    bilstm = Bidirectional(LSTM(lstm_units, return_sequences=True))(embedding) # return_sequences=True cho CNN sau đó

    # Multi-kernel CNN
    conv_0 = Conv1D(filters=cnn_filters,
                    kernel_size=3,
                    padding='same',
                    activation='relu',
                    kernel_regularizer=regularizers.l2(0.01))(bilstm)
    conv_1 = Conv1D(filters=cnn_filters,
                    kernel_size=4, padding='same',
                    activation='relu',
                    kernel_regularizer=regularizers.l2(0.01))(bilstm)
    conv_2 = Conv1D(filters=cnn_filters, kernel_size=5,
                    padding='same',
                    activation='relu',
                    kernel_regularizer=regularizers.l2(0.01))(bilstm)

    # MaxPooling
    maxpool_0 = GlobalMaxPooling1D()(conv_0)
    maxpool_1 = GlobalMaxPooling1D()(conv_1)
    maxpool_2 = GlobalMaxPooling1D()(conv_2)

    # Kết hợp BiLSTM và CNN
    merged = concatenate([maxpool_0, maxpool_1, maxpool_2]) # BiLSTM đã được xử lý bởi CNN

    # Thêm BatchNormalization và Dropout
    x = BatchNormalization()(merged)
    x = Dropout(dropout_rate)(x)

    x = Dense(lstm_units, activation='relu')(x) # Giữ số units tương đương LSTM
    x = Dropout(dropout_rate)(x)

    # Output
    output = Dense(units=num_classes, activation='softmax')(x)

    # Model
    model_lstm_cnn = Model(inputs, output, name="Stacked_BiLSTN_CNN")
    return model_lstm_cnn

# Tạo và tóm tắt mô hình
model_lstm_cnn = build_lstm_cnn_model(
    inputs,
    embedding,
    dropout_rate=dropout_rate,
    num_classes=3 # Thay đổi nếu số lượng lớp của bạn khác
)
model_lstm_cnn.summary()

# Lưu thông tin mô hình (nếu bạn đang sử dụng dictionary model_infor)
if 'model_infor' in locals():
    model_infor['Stacked_BiLSTN_CNN'] = model_lstm_cnn

Model: "Stacked_BiLSTN_CNN"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 bidirectional_22 (Bidirect  (None, 300, 256)             541696    ['embedding[11][0]']          
 ional)                                                                                           
                                                                                                  
 conv1d_152 (Conv1D)         (None, 300, 128)             98432     ['bidirection

## 11. LSTM + CNN + LSTM (theo thứ tự LSTM → CNN → LSTM)

In [137]:
# LSTM đầu tiên
lstm1 = LSTM(128, return_sequences=True, name='lstm1')(embedding)

# CNN kế tiếp
conv = Conv1D(filters=num_filters, kernel_size=5, activation='relu', padding='same', name='conv1d')(lstm1)
pool = MaxPooling1D(pool_size=2, name='maxpool')(conv)

# LSTM thứ hai
lstm2 = LSTM(128, return_sequences=False, name='lstm2')(pool)

# Dense + Dropout
dense = Dense(128, activation='relu', name='dense')(lstm2)
drop = Dropout(0.7, name='dropout')(dense)

# Output layer
output = Dense(units=num_classes, activation='softmax', name='output')(drop)

# Model
model_rnn_cnn_lstm = Model(inputs, output, name="LSTM_CNN_LSTM")
model_rnn_cnn_lstm.summary()

# Lưu vào dict
model_infor['LSTM_CNN_LSTM'] = model_rnn_cnn_lstm

Model: "LSTM_CNN_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_12 (InputLayer)       [(None, 300)]             0         
                                                                 
 embedding (Embedding)       (None, 300, 400)          3167600   
                                                                 
 lstm1 (LSTM)                (None, 300, 128)          270848    
                                                                 
 conv1d (Conv1D)             (None, 300, 100)          64100     
                                                                 
 maxpool (MaxPooling1D)      (None, 150, 100)          0         
                                                                 
 lstm2 (LSTM)                (None, 128)               117248    
                                                                 
 dense (Dense)               (None, 128)             

## 12. GRU + CNN liên tiếp

In [138]:
# CNN trước GRU
conv_0 = Conv1D(num_filters, filter_sizes[0],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_1 = Conv1D(num_filters, filter_sizes[1],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_2 = Conv1D(num_filters, filter_sizes[2],activation='relu',kernel_regularizer=regularizers.l2(0.01))(embedding)
print(conv_1)
maxpool_0 = MaxPooling1D(sequence_length - filter_sizes[0] + 1, strides=1)(conv_0)
maxpool_1 = MaxPooling1D(sequence_length - filter_sizes[1] + 1, strides=1)(conv_1)
maxpool_2 = MaxPooling1D(sequence_length - filter_sizes[2] + 1, strides=1)(conv_2)

merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2], axis=1)

# GRU sau CNN
gru = GRU(128, return_sequences=True, dropout=dropout_rate, recurrent_dropout=dropout_rate)(merged_tensor)

# GlobalMaxPooling1D
flatten = GlobalMaxPooling1D()(gru)

# Dense layers
dense = Dense(128, activation='relu')(flatten)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=3, activation='softmax')(drop)

# Build model
model_gru_cnn = Model(inputs, output, name="CNN_GRU")
model_gru_cnn.summary()

# Lưu vào dict thông tin
model_infor['CNN_GRU'] = model_gru_cnn

KerasTensor(type_spec=TensorSpec(shape=(None, 297, 100), dtype=tf.float32, name=None), name='conv1d_156/Relu:0', description="created by layer 'conv1d_156'")
Model: "CNN_GRU"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 conv1d_155 (Conv1D)         (None, 298, 100)             120100    ['embedding[11][0]']          
                                                                                                  
 conv1d_156 (Conv1D)         (Non

## 13. CNN + GRU liên tiếp

In [139]:
# GRU sau CNN
gru = GRU(128, return_sequences=True, dropout=dropout_rate, recurrent_dropout=dropout_rate)(embedding)

# GlobalMaxPooling1D
flatten = GlobalMaxPooling1D()(gru)
conv_0 = Conv1D(num_filters, filter_sizes[0],activation='relu',kernel_regularizer=regularizers.l2(0.01))(gru)
conv_1 = Conv1D(num_filters, filter_sizes[1],activation='relu',kernel_regularizer=regularizers.l2(0.01))(gru)
conv_2 = Conv1D(num_filters, filter_sizes[2],activation='relu',kernel_regularizer=regularizers.l2(0.01))(gru)
print(conv_1)
maxpool_0 = MaxPooling1D(sequence_length - filter_sizes[0] + 1, strides=1)(conv_0)
maxpool_1 = MaxPooling1D(sequence_length - filter_sizes[1] + 1, strides=1)(conv_1)
maxpool_2 = MaxPooling1D(sequence_length - filter_sizes[2] + 1, strides=1)(conv_2)

merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2], axis=1)

# GlobalMaxPooling1D
flatten = GlobalMaxPooling1D()(merged_tensor)

# Dense layers
dense = Dense(128, activation='relu')(flatten)
drop = Dropout(dropout_rate)(dense)
output = Dense(units=3, activation='softmax')(drop)

# Build model
model_gru_cnn = Model(inputs, output, name="GRU_CNN")
model_gru_cnn.summary()

# Lưu vào dict thông tin
model_infor['GRU_CNN'] = model_gru_cnn

KerasTensor(type_spec=TensorSpec(shape=(None, 297, 100), dtype=tf.float32, name=None), name='conv1d_159/Relu:0', description="created by layer 'conv1d_159'")
Model: "GRU_CNN"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_12[0][0]']            
                                                                                                  
 gru_30 (GRU)                (None, 300, 128)             203520    ['embedding[11][0]']          
                                                                                                  
 conv1d_158 (Conv1D)         (Non

## 14. GRU và CNN song song

In [140]:
from tensorflow.keras.layers import Input, Embedding, Conv1D, MaxPooling1D, GRU, Dense, Dropout, Flatten, concatenate, Reshape
from tensorflow.keras import Model, regularizers

sequence_length = data.shape[1]
filter_sizes = [3, 4, 5]
num_filters = 100
dropout_rate = 0.7

# Input & Embedding
inputs = Input(shape=(sequence_length,))
embedding = embedding_layer(inputs)  # embedding_layer phải được định nghĩa trước

# --- CNN Branch ---
conv_0 = Conv1D(num_filters, filter_sizes[0], activation='relu',
                kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_1 = Conv1D(num_filters, filter_sizes[1], activation='relu',
                kernel_regularizer=regularizers.l2(0.01))(embedding)
conv_2 = Conv1D(num_filters, filter_sizes[2], activation='relu',
                kernel_regularizer=regularizers.l2(0.01))(embedding)

maxpool_0 = MaxPooling1D(sequence_length - filter_sizes[0] + 1, strides=1)(conv_0)
maxpool_1 = MaxPooling1D(sequence_length - filter_sizes[1] + 1, strides=1)(conv_1)
maxpool_2 = MaxPooling1D(sequence_length - filter_sizes[2] + 1, strides=1)(conv_2)

cnn_merged = concatenate([maxpool_0, maxpool_1, maxpool_2], axis=1)
cnn_flatten = Flatten()(cnn_merged)

# --- GRU Branch ---
gru = GRU(128, return_sequences=False, kernel_regularizer=regularizers.l2(0.01))(embedding)

# --- Combine CNN + GRU ---
merged = concatenate([cnn_flatten, gru])

# --- Output ---
dropout = Dropout(dropout_rate)(merged)
output = Dense(units=3, activation='softmax',
               kernel_regularizer=regularizers.l2(0.01))(dropout)

model_joint = Model(inputs=inputs, outputs=output, name="CNN_GRU_Parallel")
model_infor['CNN_GRU_Parallel'] = model_joint

model_joint.summary()


Model: "CNN_GRU_Parallel"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_13 (InputLayer)       [(None, 300)]                0         []                            
                                                                                                  
 embedding (Embedding)       (None, 300, 400)             3167600   ['input_13[0][0]']            
                                                                                                  
 conv1d_161 (Conv1D)         (None, 298, 100)             120100    ['embedding[12][0]']          
                                                                                                  
 conv1d_162 (Conv1D)         (None, 297, 100)             160100    ['embedding[12][0]']          
                                                                                   

## 15. BERT-based Approach

### Chuẩn bị thư viện

In [398]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel
import pandas as pd
from sklearn.metrics import classification_report
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


### 15.a BERT-BASE

In [56]:
import os
import argparse
import pandas as pd
from datasets import Dataset
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
)

# --------- Cấu hình BERT đa ngôn ngữ ---------
MODEL_NAME = "bert-base-multilingual-cased"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# --------- Load và xử lý dữ liệu ---------
def load_data(file_path):
    df = pd.read_csv(file_path, sep='\t')
    df.columns = ['Class', 'Data']
    df = df[['Class', 'Data']].dropna()

    df = df.rename(columns={'Class': 'label', 'Data': 'text'})

    # Map nhãn: -1 → 0, 0 → 1, 1 → 2
    df['label'] = df['label'].map({-1: 0, 0: 1, 1: 2})
    return Dataset.from_pandas(df)

train_dataset = load_data("vlsp_sentiment_train.csv")
test_dataset = load_data("vlsp_sentiment_test.csv")

# --------- Token hóa ---------
def tokenize_fn(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

train_dataset = train_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")

train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# --------- Load mô hình BERT ---------
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)

# --------- Tham số dòng lệnh ---------
def get_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("--lr", type=float, default=2e-5)
    parser.add_argument("--bs", type=int, default=16)
    parser.add_argument("--epochs", type=int, default=5)
    return parser.parse_args(args=[])

args_cli = get_arguments()

# --------- TrainingArguments ---------
training_args = TrainingArguments(
    output_dir='./results',
    learning_rate=args_cli.lr,
    per_device_train_batch_size=args_cli.bs,
    per_device_eval_batch_size=args_cli.bs,
    num_train_epochs=args_cli.epochs,
    weight_decay=0.01,
    logging_steps=50,
    logging_dir='./logs',
    do_eval=True,
    save_total_limit=1
)

# --------- Metrics ---------
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# --------- Trainer ---------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# --------- Huấn luyện ---------
trainer.train()

# --------- Đánh giá ---------
predictions = trainer.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = predictions.label_ids

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=["-1", "0", "1"]))

accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Map: 100%|██████████| 5100/5100 [00:00<00:00, 20140.51 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 19979.04 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  arr = np.array(obj)


Step,Training Loss
50,1.1163
100,1.0818
150,1.0185
200,0.9666
250,0.921
300,0.868
350,0.7707
400,0.7788
450,0.7481
500,0.746


  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)


Classification Report:
              precision    recall  f1-score   support

          -1       0.67      0.73      0.70       350
           0       0.65      0.65      0.65       350
           1       0.79      0.72      0.75       350

    accuracy                           0.70      1050
   macro avg       0.70      0.70      0.70      1050
weighted avg       0.70      0.70      0.70      1050

Accuracy: 69.90%


### 15.b phobert-base

In [None]:
import os
import argparse
import gc
import torch
import pandas as pd
from datasets import Dataset
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from pyvi import ViTokenizer

# --------- Cấu hình PhoBERT ---------
MODEL_NAME = "vinai/phobert-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)

# --------- Load và xử lý dữ liệu ---------
def load_data(file_path):
    df = pd.read_csv(file_path, sep='\t')
    df.columns = ['Class', 'Data']
    df = df[['Class', 'Data']].dropna()

    # Tách từ bằng PyVi
    df['Data'] = df['Data'].apply(lambda x: ViTokenizer.tokenize(str(x)))

    df = df.rename(columns={'Class': 'label', 'Data': 'text'})

    # Map nhãn: -1 → 0, 0 → 1, 1 → 2
    df['label'] = df['label'].map({-1: 0, 0: 1, 1: 2})
    return Dataset.from_pandas(df)

train_dataset = load_data("vlsp_sentiment_train.csv")
test_dataset = load_data("vlsp_sentiment_test.csv")

# --------- Token hóa ---------
def tokenize_fn(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

train_dataset = train_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")

train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# --------- Load mô hình PhoBERT ---------
model_phobert_base = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)

# --------- Tham số dòng lệnh ---------
def get_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("--lr", type=float, default=2e-5)
    parser.add_argument("--bs", type=int, default=16)
    parser.add_argument("--epochs", type=int, default=10)
    return parser.parse_args(args=[])

args_cli = get_arguments()

# --------- TrainingArguments ---------
training_args = TrainingArguments(
    output_dir='./results',
    learning_rate=args_cli.lr,
    per_device_train_batch_size=args_cli.bs,
    per_device_eval_batch_size=args_cli.bs,
    num_train_epochs=args_cli.epochs,
    weight_decay=0.01,
    logging_steps=50,
    logging_dir='./logs',
    do_eval=True,
    save_total_limit=1
)

# --------- Hàm tính metrics ---------
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# --------- Trainer ---------
trainer_phobert_base = Trainer(
    model=model_phobert_base,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# --------- Huấn luyện ---------
trainer_phobert_base.train()

# --------- Đánh giá và in kết quả ---------
predictions = trainer_phobert_base.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = predictions.label_ids

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=["-1", "0", "1"]))

accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Map: 100%|██████████| 5100/5100 [00:00<00:00, 8523.75 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 8533.60 examples/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  arr = np.array(obj)


Step,Training Loss
50,1.0774
100,0.9814
150,0.8039
200,0.7523
250,0.7547
300,0.6758
350,0.6194
400,0.6103
450,0.5826
500,0.5644


  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)


Classification Report:
              precision    recall  f1-score   support

          -1       0.79      0.76      0.77       350
           0       0.70      0.69      0.69       350
           1       0.80      0.85      0.82       350

    accuracy                           0.76      1050
   macro avg       0.76      0.76      0.76      1050
weighted avg       0.76      0.76      0.76      1050

Accuracy: 76.29%


### 15.b phobert-large

In [4]:
import os
import argparse
import gc
import torch
import pandas as pd
from datasets import Dataset
from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback
)
from pyvi import ViTokenizer

# --------- Cấu hình PhoBERT ---------
MODEL_NAME = "vinai/phobert-large"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)

# --------- Load và xử lý dữ liệu ---------
def load_data(file_path):
    df = pd.read_csv(file_path, sep='\t')
    df.columns = ['Class', 'Data']
    df = df[['Class', 'Data']].dropna()

    # Tách từ bằng PyVi
    df['Data'] = df['Data'].apply(lambda x: ViTokenizer.tokenize(str(x)))

    df = df.rename(columns={'Class': 'label', 'Data': 'text'})

    # Map nhãn: -1 → 0, 0 → 1, 1 → 2
    df['label'] = df['label'].map({-1: 0, 0: 1, 1: 2})
    return Dataset.from_pandas(df)

train_dataset = load_data("vlsp_sentiment_train.csv")
test_dataset = load_data("vlsp_sentiment_test.csv")

# --------- Token hóa ---------
def tokenize_fn(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

train_dataset = train_dataset.map(tokenize_fn, batched=True)
test_dataset = test_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")

train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

# --------- Thiết bị (device) ---------
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(">>> Sử dụng thiết bị:", device)

# --------- Load mô hình PhoBERT ---------
model_phobert_base = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3)
model_phobert_base.to(device)

# --------- Tham số dòng lệnh ---------
def get_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument("--lr", type=float, default=2e-5)
    parser.add_argument("--bs", type=int, default=4)   # Giảm batch size để tránh OOM
    parser.add_argument("--epochs", type=int, default=10)
    return parser.parse_args(args=[])

args_cli = get_arguments()

# --------- TrainingArguments ---------
training_args = TrainingArguments(
    output_dir='./results',
    learning_rate=args_cli.lr,
    per_device_train_batch_size=args_cli.bs,
    per_device_eval_batch_size=args_cli.bs,
    num_train_epochs=args_cli.epochs,
    weight_decay=0.01,
    logging_steps=50,
    logging_dir='./logs',
    do_eval=True,
    save_total_limit=1
)

# --------- Hàm tính metrics ---------
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    acc = accuracy_score(labels, preds)
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

# --------- Trainer ---------
trainer_phobert_base = Trainer(
    model=model_phobert_base,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# --------- Huấn luyện ---------
torch.mps.empty_cache()  # Dọn bộ nhớ trước khi train
trainer_phobert_base.train()

# --------- Đánh giá và in kết quả ---------
torch.mps.empty_cache()
predictions = trainer_phobert_base.predict(test_dataset)
y_pred = predictions.predictions.argmax(-1)
y_true = predictions.label_ids

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=["-1", "0", "1"]))

accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


Map: 100%|██████████| 5100/5100 [00:00<00:00, 7085.35 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 7986.54 examples/s]


>>> Sử dụng thiết bị: mps


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/phobert-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer_phobert_base = Trainer(
  arr = np.array(obj)


Step,Training Loss
50,1.0995
100,1.0823
150,1.0831
200,0.9605
250,0.9718
300,1.0015
350,0.9915
400,0.8424
450,0.8197
500,0.966


  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)
  arr = np.array(obj)


Classification Report:
              precision    recall  f1-score   support

          -1       0.79      0.76      0.77       350
           0       0.69      0.70      0.70       350
           1       0.82      0.84      0.83       350

    accuracy                           0.77      1050
   macro avg       0.77      0.77      0.77      1050
weighted avg       0.77      0.77      0.77      1050

Accuracy: 76.57%


# Training model

In [46]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

In [47]:
import tensorflow as tf
tf.config.run_functions_eagerly(True)


In [144]:
for name, model in model_infor.items():
    print(f"Training model '{name}'...")

Training model 'CNN_Org'...
Training model 'LSTM_Simple'...
Training model 'Stacked_LSTM'...
Training model 'Parallel_LSTM'...
Training model 'Single_BiLSTM'...
Training model 'Parallel_BiLSTM'...
Training model 'CNN_LSTM'...
Training model 'CNN_BiLSTM'...
Training model 'Stacked_BiLSTN_CNN'...
Training model 'LSTM_CNN_LSTM'...
Training model 'CNN_GRU'...
Training model 'GRU_CNN'...
Training model 'CNN_GRU_Parallel'...
Training model 'LSTM_CNN'...


In [149]:
from keras.optimizers import Adam
from keras.metrics import Precision, Recall, AUC
import keras.backend as K

# Custom F1 score
def f1_score(y_true, y_pred):
    precision = K.cast(Precision()(y_true, y_pred), K.floatx())
    recall = K.cast(Recall()(y_true, y_pred), K.floatx())
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))

def compile_model(model):
    adam = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=adam,
        metrics=[
            'accuracy',
            Precision(name='precision'),
            Recall(name='recall'),
            AUC(name='auc'),
            f1_score
        ]
    )


In [150]:
def train_model(model):
  early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=4, verbose=1)
  callbacks_list = [early_stopping]
  history = model.fit(data, labels, validation_split=0.2,
          epochs=10, batch_size=256, callbacks=callbacks_list, shuffle=True)
  return history

In [153]:
# Compile models
for name, model in model_infor.items():
    compile_model(model)

# Train models
history = {}
for name, model in model_infor.items():
    # Define callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
    callbacks_list = [early_stopping]
    print(f"Training model '{name}'...")
    history[name] = train_model(model)



Training model 'CNN_Org'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training model 'LSTM_Simple'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 8: early stopping
Training model 'Stacked_LSTM'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 9: early stopping
Training model 'Parallel_LSTM'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Training model 'Single_BiLSTM'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 5: early stopping
Training model 'Parallel_BiLSTM'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 6: early stopping
Training model 'CNN_LSTM'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 7: early stopping
Training model 'CNN_BiLSTM'...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoc

# Đánh giá

In [58]:
print('Shape of X train and X validation tensor:',data_test.shape)
print('Shape of label train and validation tensor:', labels_test.shape)

Shape of X train and X validation tensor: (1050, 300)
Shape of label train and validation tensor: (1050, 3)


In [148]:
import matplotlib.pyplot as plt
import numpy as np

def compare_models_performance(model_dict, X_test, y_test):
    # 3. Đánh giá test set
    test_metrics = {}

    print("\nMODEL TEST PERFORMANCE")
    print("=" * 50)
    for name, model in model_dict.items():
        print(f"========{name:20s} ========")
        score = model.evaluate(X_test, y_test)
        print("%s: %.2f%%" % (model.metrics_names[0], score[0]*100))
        print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

test_accuracies = compare_models_performance(model_infor, data_test, labels_test)


MODEL TEST PERFORMANCE
loss: 239.13%
accuracy: 64.76%
loss: 113.53%
accuracy: 60.86%
loss: 102.43%
accuracy: 54.57%
loss: 140.33%
accuracy: 62.10%
loss: 121.73%
accuracy: 65.24%
loss: 145.41%
accuracy: 60.76%
loss: 171.43%
accuracy: 64.10%
loss: 457.68%
accuracy: 65.62%
loss: 317.91%
accuracy: 64.29%
loss: 219.65%
accuracy: 62.67%
loss: 139.68%
accuracy: 66.48%
loss: 161.33%
accuracy: 63.62%
loss: 260.43%
accuracy: 64.10%
loss: 143.70%
accuracy: 62.76%
