In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dropout, Dense

# Base model

vocab_size     = 20000   # size of your word index
embedding_dim  = 128     # dimensionality of the embedding vectors
lstm_units     = 64      # number of LSTM units
dropout_rate   = 0.5     # dropout probability
num_classes    = 3       # number of target classes

# 2) Build the model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim),
    Bidirectional(LSTM(units=lstm_units)),
    Dropout(rate=dropout_rate),
    Dense(units=num_classes, activation='softmax')
])

# 3) Compile
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)




In [2]:
# Data
from utils import load_nusax_data
MAX_TOKENS    = 20000
SEQ_LENGTH    = 100

tok_train, y_train, tok_val, y_val, tok_test, y_test, vocab_size, num_classes, vectorizer = load_nusax_data(MAX_TOKENS, SEQ_LENGTH)

In [3]:
# Train
BATCH_SIZE    = 50
EPOCHS        = 15

history = model.fit(
        tok_train, y_train,
        validation_data=(tok_val, y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        verbose=2
    )



Epoch 1/15
10/10 - 3s - 297ms/step - accuracy: 0.3720 - loss: 1.0865 - val_accuracy: 0.5900 - val_loss: 1.0684
Epoch 2/15
10/10 - 1s - 50ms/step - accuracy: 0.5040 - loss: 1.0313 - val_accuracy: 0.4100 - val_loss: 1.0292
Epoch 3/15
10/10 - 0s - 49ms/step - accuracy: 0.5700 - loss: 0.9469 - val_accuracy: 0.5300 - val_loss: 0.9470
Epoch 4/15
10/10 - 0s - 44ms/step - accuracy: 0.6880 - loss: 0.7899 - val_accuracy: 0.5700 - val_loss: 0.8780
Epoch 5/15
10/10 - 0s - 44ms/step - accuracy: 0.7720 - loss: 0.6371 - val_accuracy: 0.6800 - val_loss: 0.7534
Epoch 6/15
10/10 - 0s - 45ms/step - accuracy: 0.8880 - loss: 0.4126 - val_accuracy: 0.6900 - val_loss: 0.7122
Epoch 7/15
10/10 - 0s - 45ms/step - accuracy: 0.9160 - loss: 0.2948 - val_accuracy: 0.7400 - val_loss: 0.6695
Epoch 8/15
10/10 - 0s - 46ms/step - accuracy: 0.9660 - loss: 0.1707 - val_accuracy: 0.7800 - val_loss: 0.6125
Epoch 9/15
10/10 - 0s - 45ms/step - accuracy: 0.9360 - loss: 0.1743 - val_accuracy: 0.7100 - val_loss: 0.7251
Epoch 10/

In [4]:
# 4) (Optional) View a summary
model.summary()

In [5]:
model.save_weights("weightbruh.weights.h5")

In [4]:
model.load_weights("weightbruh.weights.h5")

In [None]:
from model import ManualLSTMModel

manual = ManualLSTMModel(vocab_size, embedding_dim,lstm_units, dropout_rate, num_classes)


[-3.4774366e-06 -9.2663961e-03 -3.8522722e-03  7.0058862e-03]


In [7]:
manual.load_weights("weightbruh.weights.h5")

Testing Area

In [8]:
Token = vectorizer("Kangkungnya lumayan tapi kepiting saus padangnya mengecewakan kami dikasih kepiting yang kopong akhir kami tidak makan keptingnya dan dikembalikan.")

In [9]:
import tensorflow as tf
Token_casted = tf.cast([Token], tf.int32).numpy()

In [10]:
result = model.predict(Token_casted)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step


In [11]:
result

array([[0.94537485, 0.02387432, 0.03075085]], dtype=float32)

In [12]:
manual_result = manual.forward(Token_casted)

In [13]:
manual_result

array([[0.94537507, 0.02387421, 0.03075072]])

In [None]:
# POSITIVE array([[0.00261186, 0.00146393, 0.9959241 ]], dtype=float32)
# Neutral array([[0.02828578, 0.96762127, 0.00409289]], dtype=float32)
# Negative array([[0.9517922 , 0.04311086, 0.00509697]], dtype=float32)

In [21]:
tmodel =  Sequential([
    model.layers[0],
])

# 3) Compile
tmodel.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [23]:
tresult = tmodel.predict(Token_casted)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


In [24]:
tresult.shape

(1, 100, 128)