In [1]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import pickle


In [2]:
with open("../models/tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)


In [3]:
VOCAB_SIZE = 20000
EMBEDDING_DIM = 128
MAX_LEN = 300
LSTM_UNITS = 128

In [None]:
input_text = Input(shape=(MAX_LEN,), name="text_input")

embedding = Embedding(
    input_dim=VOCAB_SIZE,
    output_dim=EMBEDDING_DIM,
    name="embedding_layer"
)(input_text)

bilstm = Bidirectional(
    LSTM(LSTM_UNITS, return_sequences=False),
    name="bilstm_layer"
)(embedding)

dropout = Dropout(0.5, name="dropout_layer")(bilstm)

output = Dense(1, activation="sigmoid", name="output_layer")(dropout)

bilstm_model = Model(inputs=input_text, outputs=output)



In [6]:
bilstm_model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

bilstm_model.summary()


In [10]:
import numpy as np

X_train = np.load("../data/processed/X_train.npy")
X_val   = np.load("../data/processed/X_val.npy")
X_test  = np.load("../data/processed/X_test.npy")

y_train = np.load("../data/processed/y_train.npy")
y_val   = np.load("../data/processed/y_val.npy")
y_test  = np.load("../data/processed/y_test.npy")

print(X_train.shape, y_train.shape)


(2100, 300) (2100,)


In [17]:
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)

history = bilstm_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=32,
    callbacks=[early_stop]
)

print("Model Trained Successfully!")


Epoch 1/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 188ms/step - accuracy: 1.0000 - loss: 2.5783e-04 - val_accuracy: 1.0000 - val_loss: 1.2230e-04
Epoch 2/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 180ms/step - accuracy: 1.0000 - loss: 2.5627e-04 - val_accuracy: 1.0000 - val_loss: 1.0162e-04
Epoch 3/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 180ms/step - accuracy: 1.0000 - loss: 1.8192e-04 - val_accuracy: 1.0000 - val_loss: 8.6726e-05
Epoch 4/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 181ms/step - accuracy: 1.0000 - loss: 1.5778e-04 - val_accuracy: 1.0000 - val_loss: 7.4879e-05
Epoch 5/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 179ms/step - accuracy: 1.0000 - loss: 1.4693e-04 - val_accuracy: 1.0000 - val_loss: 6.5160e-05
Epoch 6/10
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 180ms/step - accuracy: 1.0000 - loss: 1.2398e-04 - val_accuracy: 1.00

In [12]:
test_loss, test_accuracy = bilstm_model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step - accuracy: 1.0000 - loss: 1.5000e-04
Test Accuracy: 1.0


In [16]:
bilstm_model.save("../models/bilstm_model.keras")


In [14]:
feature_extractor = Model(
    inputs=bilstm_model.input,
    outputs=bilstm_model.get_layer("bilstm_layer").output
)

features_sample = feature_extractor.predict(X_train[:5])
print(features_sample.shape)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 284ms/step
(5, 256)
